summaryrefslogtreecommitdiff
path: root/runtime/onert
diff options
context:
space:
mode:
authorChunseok Lee <chunseok.lee@samsung.com>2020-09-05 21:49:46 +0900
committerChunseok Lee <chunseok.lee@samsung.com>2020-09-05 21:49:46 +0900
commit74476a2d0296bdad70a2f7f90bc7419a8b05bffd (patch)
tree3f991636c1e9423d38eb16a384c20b569b0d678e /runtime/onert
parent042b262b3633b6c0f577aed6cb4b980ad0c1dcf3 (diff)
downloadnnfw-74476a2d0296bdad70a2f7f90bc7419a8b05bffd.tar.gz
nnfw-74476a2d0296bdad70a2f7f90bc7419a8b05bffd.tar.bz2
nnfw-74476a2d0296bdad70a2f7f90bc7419a8b05bffd.zip
Diffstat (limited to 'runtime/onert')
-rw-r--r--runtime/onert/api/include/nnfw.h2
-rw-r--r--runtime/onert/api/include/nnfw_experimental.h34
-rw-r--r--runtime/onert/api/include/nnfw_version.h2
-rw-r--r--runtime/onert/api/src/nnfw_api.cc13
-rw-r--r--runtime/onert/api/src/nnfw_api_internal.cc65
-rw-r--r--runtime/onert/api/src/nnfw_api_internal.h11
-rw-r--r--runtime/onert/backend/acl_cl/Backend.h10
-rw-r--r--runtime/onert/backend/acl_cl/ConstantInitializer.cc99
-rw-r--r--runtime/onert/backend/acl_cl/ConstantInitializer.h32
-rw-r--r--runtime/onert/backend/acl_cl/KernelGenerator.cc1238
-rw-r--r--runtime/onert/backend/acl_cl/KernelGenerator.h37
-rw-r--r--runtime/onert/backend/acl_cl/Optimizer.cc2
-rw-r--r--runtime/onert/backend/acl_cl/TensorManager.h2
-rw-r--r--runtime/onert/backend/acl_common/AclConstantInitializer.cc128
-rw-r--r--runtime/onert/backend/acl_common/AclConstantInitializer.h61
-rw-r--r--runtime/onert/backend/acl_common/AclFunction.h6
-rw-r--r--runtime/onert/backend/acl_common/AclKernelGen.h149
-rw-r--r--runtime/onert/backend/acl_common/AclTensorBuilder.h42
-rw-r--r--runtime/onert/backend/acl_common/AclTensorRegistry.h59
-rw-r--r--runtime/onert/backend/acl_common/Convert.cc65
-rw-r--r--runtime/onert/backend/acl_common/Convert.h7
-rw-r--r--runtime/onert/backend/acl_neon/Backend.h9
-rw-r--r--runtime/onert/backend/acl_neon/ConstantInitializer.cc97
-rw-r--r--runtime/onert/backend/acl_neon/ConstantInitializer.h26
-rw-r--r--runtime/onert/backend/acl_neon/KernelGenerator.cc1249
-rw-r--r--runtime/onert/backend/acl_neon/KernelGenerator.h36
-rw-r--r--runtime/onert/backend/acl_neon/Optimizer.cc2
-rw-r--r--runtime/onert/backend/acl_neon/TensorManager.h2
-rw-r--r--runtime/onert/backend/cpu/Backend.h8
-rw-r--r--runtime/onert/backend/cpu/BackendContext.h6
-rw-r--r--runtime/onert/backend/cpu/ConstantInitializer.cc4
-rw-r--r--runtime/onert/backend/cpu/ConstantInitializer.h8
-rw-r--r--runtime/onert/backend/cpu/KernelGenerator.cc851
-rw-r--r--runtime/onert/backend/cpu/KernelGenerator.h34
-rw-r--r--runtime/onert/backend/cpu/TensorBuilder.cc31
-rw-r--r--runtime/onert/backend/cpu/TensorBuilder.h26
-rw-r--r--runtime/onert/backend/cpu/ops/AbsLayer.cc70
-rw-r--r--runtime/onert/backend/cpu/ops/AbsLayer.h57
-rw-r--r--runtime/onert/backend/cpu/ops/AddLayer.cc166
-rw-r--r--runtime/onert/backend/cpu/ops/AddLayer.h67
-rw-r--r--runtime/onert/backend/cpu/ops/AvgPoolLayer.cc118
-rw-r--r--runtime/onert/backend/cpu/ops/AvgPoolLayer.h75
-rw-r--r--runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc225
-rw-r--r--runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.h (renamed from runtime/onert/backend/cpu/ops/DivLayer.h)28
-rw-r--r--runtime/onert/backend/cpu/ops/CastLayer.cc112
-rw-r--r--runtime/onert/backend/cpu/ops/CastLayer.h57
-rw-r--r--runtime/onert/backend/cpu/ops/ConvolutionLayer.cc21
-rw-r--r--runtime/onert/backend/cpu/ops/ConvolutionLayer.h5
-rw-r--r--runtime/onert/backend/cpu/ops/CosLayer.cc68
-rw-r--r--runtime/onert/backend/cpu/ops/CosLayer.h54
-rw-r--r--runtime/onert/backend/cpu/ops/DivLayer.cc95
-rw-r--r--runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc173
-rw-r--r--runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h (renamed from runtime/onert/backend/cpu/ops/TanhLayer.h)29
-rw-r--r--runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc151
-rw-r--r--runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.h (renamed from runtime/onert/backend/cpu/ops/MaxLayer.h)26
-rw-r--r--runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc336
-rw-r--r--runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h (renamed from runtime/onert/backend/cpu/ops/ReLU6Layer.h)38
-rw-r--r--runtime/onert/backend/cpu/ops/ExpLayer.cc74
-rw-r--r--runtime/onert/backend/cpu/ops/LogLayer.cc70
-rw-r--r--runtime/onert/backend/cpu/ops/LogLayer.h57
-rw-r--r--runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc26
-rw-r--r--runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h3
-rw-r--r--runtime/onert/backend/cpu/ops/LogicalNotLayer.cc64
-rw-r--r--runtime/onert/backend/cpu/ops/LogicalNotLayer.h56
-rw-r--r--runtime/onert/backend/cpu/ops/LogicalOrLayer.cc76
-rw-r--r--runtime/onert/backend/cpu/ops/LogicalOrLayer.h59
-rw-r--r--runtime/onert/backend/cpu/ops/LogisticLayer.cc108
-rw-r--r--runtime/onert/backend/cpu/ops/LogisticLayer.h60
-rw-r--r--runtime/onert/backend/cpu/ops/MaxLayer.cc85
-rw-r--r--runtime/onert/backend/cpu/ops/MaxPoolLayer.cc115
-rw-r--r--runtime/onert/backend/cpu/ops/MinLayer.cc89
-rw-r--r--runtime/onert/backend/cpu/ops/MinLayer.h61
-rw-r--r--runtime/onert/backend/cpu/ops/MulLayer.cc116
-rw-r--r--runtime/onert/backend/cpu/ops/MulLayer.h65
-rw-r--r--runtime/onert/backend/cpu/ops/NegLayer.cc70
-rw-r--r--runtime/onert/backend/cpu/ops/NegLayer.h57
-rw-r--r--runtime/onert/backend/cpu/ops/PoolLayer.cc132
-rw-r--r--runtime/onert/backend/cpu/ops/PoolLayer.h (renamed from runtime/onert/backend/cpu/ops/MaxPoolLayer.h)37
-rw-r--r--runtime/onert/backend/cpu/ops/QuantizeLayer.cc63
-rw-r--r--runtime/onert/backend/cpu/ops/QuantizeLayer.h56
-rw-r--r--runtime/onert/backend/cpu/ops/RankLayer.cc (renamed from runtime/onert/backend/cpu/ops/RoundLayer.cc)24
-rw-r--r--runtime/onert/backend/cpu/ops/RankLayer.h (renamed from runtime/onert/backend/cpu/ops/ZerosLikeLayer.h)12
-rw-r--r--runtime/onert/backend/cpu/ops/ReLU6Layer.cc74
-rw-r--r--runtime/onert/backend/cpu/ops/ReLULayer.cc74
-rw-r--r--runtime/onert/backend/cpu/ops/ReLULayer.h57
-rw-r--r--runtime/onert/backend/cpu/ops/ReduceLayer.cc90
-rw-r--r--runtime/onert/backend/cpu/ops/ReduceLayer.h5
-rw-r--r--runtime/onert/backend/cpu/ops/RoundLayer.h54
-rw-r--r--runtime/onert/backend/cpu/ops/RsqrtLayer.cc69
-rw-r--r--runtime/onert/backend/cpu/ops/RsqrtLayer.h53
-rw-r--r--runtime/onert/backend/cpu/ops/SinLayer.cc68
-rw-r--r--runtime/onert/backend/cpu/ops/SinLayer.h54
-rw-r--r--runtime/onert/backend/cpu/ops/SoftMaxLayer.cc50
-rw-r--r--runtime/onert/backend/cpu/ops/SubLayer.cc162
-rw-r--r--runtime/onert/backend/cpu/ops/SubLayer.h67
-rw-r--r--runtime/onert/backend/cpu/ops/TanhLayer.cc103
-rw-r--r--runtime/onert/backend/cpu/ops/ZerosLikeLayer.cc63
-rw-r--r--runtime/onert/core/include/backend/BackendContext.h9
-rw-r--r--runtime/onert/core/include/backend/IConstantInitializer.h9
-rw-r--r--runtime/onert/core/include/backend/ITensorBuilder.h50
-rw-r--r--runtime/onert/core/include/backend/ITensorRegistry.h31
-rw-r--r--runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h5
-rw-r--r--runtime/onert/core/include/compiler/LoweredGraph.h90
-rw-r--r--runtime/onert/core/include/compiler/StaticShapeInference.h52
-rw-r--r--runtime/onert/core/include/exec/DynamicShapeInference.h35
-rw-r--r--runtime/onert/core/include/exec/IExecutor.h2
-rw-r--r--runtime/onert/core/include/exec/IODescription.h4
-rw-r--r--runtime/onert/core/include/ir/Graph.h8
-rw-r--r--runtime/onert/core/include/ir/InternalType.h6
-rw-r--r--runtime/onert/core/include/ir/LoweredGraph.h87
-rw-r--r--runtime/onert/core/include/ir/OpSequences.h15
-rw-r--r--runtime/onert/core/include/ir/Operations.Include.h38
-rw-r--r--runtime/onert/core/include/ir/Operations.lst38
-rw-r--r--runtime/onert/core/include/ir/Padding.h3
-rw-r--r--runtime/onert/core/include/ir/operation/Abs.h49
-rw-r--r--runtime/onert/core/include/ir/operation/BinaryArithmetic.h (renamed from runtime/onert/core/include/ir/operation/Add.h)25
-rw-r--r--runtime/onert/core/include/ir/operation/BroadcastTo.h2
-rw-r--r--runtime/onert/core/include/ir/operation/Cast.h49
-rw-r--r--runtime/onert/core/include/ir/operation/Conv2D.h1
-rw-r--r--runtime/onert/core/include/ir/operation/Cos.h49
-rw-r--r--runtime/onert/core/include/ir/operation/Dequantize.h49
-rw-r--r--runtime/onert/core/include/ir/operation/Einsum.h2
-rw-r--r--runtime/onert/core/include/ir/operation/ElementwiseActivation.h (renamed from runtime/onert/core/include/ir/operation/Div.h)37
-rw-r--r--runtime/onert/core/include/ir/operation/ElementwiseBinary.h (renamed from runtime/onert/core/include/ir/operation/Mul.h)27
-rw-r--r--runtime/onert/core/include/ir/operation/ElementwiseUnary.h (renamed from runtime/onert/core/include/ir/operation/MaxPool2D.h)48
-rw-r--r--runtime/onert/core/include/ir/operation/Exp.h49
-rw-r--r--runtime/onert/core/include/ir/operation/Floor.h51
-rw-r--r--runtime/onert/core/include/ir/operation/Log.h49
-rw-r--r--runtime/onert/core/include/ir/operation/LogicalAnd.h50
-rw-r--r--runtime/onert/core/include/ir/operation/LogicalNot.h49
-rw-r--r--runtime/onert/core/include/ir/operation/LogicalOr.h50
-rw-r--r--runtime/onert/core/include/ir/operation/Logistic.h49
-rw-r--r--runtime/onert/core/include/ir/operation/Max.h50
-rw-r--r--runtime/onert/core/include/ir/operation/Mean.h61
-rw-r--r--runtime/onert/core/include/ir/operation/Min.h50
-rw-r--r--runtime/onert/core/include/ir/operation/Neg.h49
-rw-r--r--runtime/onert/core/include/ir/operation/Pool2D.h (renamed from runtime/onert/core/include/ir/operation/AvgPool2D.h)26
-rw-r--r--runtime/onert/core/include/ir/operation/Quantize.h49
-rw-r--r--runtime/onert/core/include/ir/operation/RSQRT.h49
-rw-r--r--runtime/onert/core/include/ir/operation/Rank.h (renamed from runtime/onert/core/include/ir/operation/Round.h)14
-rw-r--r--runtime/onert/core/include/ir/operation/ReLU.h49
-rw-r--r--runtime/onert/core/include/ir/operation/ReLU1.h49
-rw-r--r--runtime/onert/core/include/ir/operation/ReLU6.h49
-rw-r--r--runtime/onert/core/include/ir/operation/ResizeNearestNeighbor.h (renamed from runtime/onert/core/include/ir/operation/L2Pool2D.h)26
-rw-r--r--runtime/onert/core/include/ir/operation/SQRT.h49
-rw-r--r--runtime/onert/core/include/ir/operation/Select.h2
-rw-r--r--runtime/onert/core/include/ir/operation/Sin.h49
-rw-r--r--runtime/onert/core/include/ir/operation/Sub.h62
-rw-r--r--runtime/onert/core/include/ir/operation/Tanh.h49
-rw-r--r--runtime/onert/core/include/ir/operation/ZerosLike.h49
-rw-r--r--runtime/onert/core/include/util/Config.lst2
-rw-r--r--runtime/onert/core/include/util/Exceptions.h (renamed from runtime/onert/backend/cpu/ops/ExpLayer.h)45
-rw-r--r--runtime/onert/core/include/util/ShapeInference.h14
-rw-r--r--runtime/onert/core/src/backend/controlflow/Backend.h9
-rw-r--r--runtime/onert/core/src/backend/controlflow/ConstantInitializer.h10
-rw-r--r--runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc31
-rw-r--r--runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h13
-rw-r--r--runtime/onert/core/src/backend/controlflow/KernelGenerator.cc74
-rw-r--r--runtime/onert/core/src/backend/controlflow/KernelGenerator.h18
-rw-r--r--runtime/onert/core/src/backend/controlflow/Tensor.h (renamed from runtime/onert/core/src/ir/operation/Log.cc)24
-rw-r--r--runtime/onert/core/src/backend/controlflow/TensorBuilder.cc52
-rw-r--r--runtime/onert/core/src/backend/controlflow/TensorBuilder.h24
-rw-r--r--runtime/onert/core/src/backend/controlflow/TensorRegistry.h134
-rw-r--r--runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc6
-rw-r--r--runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc12
-rw-r--r--runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc8
-rw-r--r--runtime/onert/core/src/compiler/Compiler.cc10
-rw-r--r--runtime/onert/core/src/compiler/ExecutorFactory.cc98
-rw-r--r--runtime/onert/core/src/compiler/ExecutorFactory.h27
-rw-r--r--runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc2
-rw-r--r--runtime/onert/core/src/compiler/Fp32ToFp16Converter.h6
-rw-r--r--runtime/onert/core/src/compiler/HEScheduler.cc38
-rw-r--r--runtime/onert/core/src/compiler/HEScheduler.h2
-rw-r--r--runtime/onert/core/src/compiler/Linear.cc14
-rw-r--r--runtime/onert/core/src/compiler/Linear.h8
-rw-r--r--runtime/onert/core/src/compiler/LoweredGraph.cc (renamed from runtime/onert/core/src/ir/LoweredGraph.cc)157
-rw-r--r--runtime/onert/core/src/compiler/ManualScheduler.cc7
-rw-r--r--runtime/onert/core/src/compiler/OperationValidator.cc166
-rw-r--r--runtime/onert/core/src/compiler/OperationValidator.h21
-rw-r--r--runtime/onert/core/src/compiler/StaticShapeInference.cc406
-rw-r--r--runtime/onert/core/src/compiler/TensorBuilders.h11
-rw-r--r--runtime/onert/core/src/compiler/TensorRegistries.h91
-rw-r--r--runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc (renamed from runtime/onert/core/src/ir/pass/ConstantInsertionPass.cc)12
-rw-r--r--runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h (renamed from runtime/onert/core/src/ir/pass/ConstantInsertionPass.h)21
-rw-r--r--runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc (renamed from runtime/onert/core/src/ir/pass/ConstantLoweringPass.cc)12
-rw-r--r--runtime/onert/core/src/compiler/pass/ConstantLoweringPass.h (renamed from runtime/onert/core/src/ir/pass/ConstantLoweringPass.h)12
-rw-r--r--runtime/onert/core/src/compiler/pass/LoweredOperandPass.h (renamed from runtime/onert/core/src/ir/pass/LoweredOperandPass.h)12
-rw-r--r--runtime/onert/core/src/compiler/pass/LoweredOperationPass.h (renamed from runtime/onert/core/src/ir/pass/LoweredOperationPass.h)12
-rw-r--r--runtime/onert/core/src/compiler/pass/OperandPass.cc (renamed from runtime/onert/core/src/ir/pass/OperandPass.cc)6
-rw-r--r--runtime/onert/core/src/compiler/pass/OperandPass.h (renamed from runtime/onert/core/src/ir/pass/OperandPass.h)12
-rw-r--r--runtime/onert/core/src/compiler/pass/OperationPass.cc (renamed from runtime/onert/core/src/ir/pass/OperationPass.cc)6
-rw-r--r--runtime/onert/core/src/compiler/pass/OperationPass.h (renamed from runtime/onert/core/src/ir/pass/OperationPass.h)12
-rw-r--r--runtime/onert/core/src/compiler/pass/Pass.h (renamed from runtime/onert/core/src/ir/pass/Pass.h)16
-rw-r--r--runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc (renamed from runtime/onert/core/src/ir/pass/PermutationEliminationPass.cc)32
-rw-r--r--runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h (renamed from runtime/onert/core/src/ir/pass/PermutationEliminationPass.h)16
-rw-r--r--runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc (renamed from runtime/onert/core/src/ir/pass/PermutationInsertionPass.cc)30
-rw-r--r--runtime/onert/core/src/compiler/pass/PermutationInsertionPass.h (renamed from runtime/onert/core/src/ir/pass/PermutationInsertionPass.h)20
-rw-r--r--runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc (renamed from runtime/onert/core/src/ir/pass/PermutationOperationPass.cc)76
-rw-r--r--runtime/onert/core/src/compiler/pass/PermutationOperationPass.h64
-rw-r--r--runtime/onert/core/src/dumper/dot/DotDumper.h6
-rw-r--r--runtime/onert/core/src/exec/DataflowExecutor.cc8
-rw-r--r--runtime/onert/core/src/exec/DataflowExecutor.h5
-rw-r--r--runtime/onert/core/src/exec/DynamicShapeInference.cc113
-rw-r--r--runtime/onert/core/src/exec/Execution.cc13
-rw-r--r--runtime/onert/core/src/exec/ExecutorBase.cc81
-rw-r--r--runtime/onert/core/src/exec/ExecutorBase.h12
-rw-r--r--runtime/onert/core/src/exec/FunctionSequence.cc3
-rw-r--r--runtime/onert/core/src/exec/LinearExecutor.h8
-rw-r--r--runtime/onert/core/src/exec/ParallelExecutor.cc9
-rw-r--r--runtime/onert/core/src/exec/ParallelExecutor.h5
-rw-r--r--runtime/onert/core/src/exec/feature/nchw/Reader.h15
-rw-r--r--runtime/onert/core/src/exec/feature/nchw/View.h88
-rw-r--r--runtime/onert/core/src/exec/feature/nhwc/Reader.h15
-rw-r--r--runtime/onert/core/src/exec/feature/nhwc/View.h88
-rw-r--r--runtime/onert/core/src/interp/InterpOps.lst19
-rw-r--r--runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc75
-rw-r--r--runtime/onert/core/src/interp/operations/ElementwiseActivations.cc (renamed from runtime/onert/core/src/interp/operations/UnaryActivations.cc)82
-rw-r--r--runtime/onert/core/src/interp/operations/Logistic.cc99
-rw-r--r--runtime/onert/core/src/interp/operations/MaxPool2D.cc125
-rw-r--r--runtime/onert/core/src/interp/operations/Pool2D.cc (renamed from runtime/onert/core/src/interp/operations/AvgPool2D.cc)86
-rw-r--r--runtime/onert/core/src/interp/operations/Softmax.cc39
-rw-r--r--runtime/onert/core/src/ir/Graph.cc20
-rw-r--r--runtime/onert/core/src/ir/GraphIterator.cc2
-rw-r--r--runtime/onert/core/src/ir/GraphIterator.h12
-rw-r--r--runtime/onert/core/src/ir/OpSequences.cc16
-rw-r--r--runtime/onert/core/src/ir/OperationDumper.cc590
-rw-r--r--runtime/onert/core/src/ir/OperationDumper.h36
-rw-r--r--runtime/onert/core/src/ir/Padding.cc20
-rw-r--r--runtime/onert/core/src/ir/operation/Abs.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/Add.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/AvgPool2D.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/BinaryArithmetic.cc (renamed from runtime/onert/core/src/ir/operation/Quantize.cc)23
-rw-r--r--runtime/onert/core/src/ir/operation/Cast.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/Dequantize.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/Div.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/ElementwiseActivation.cc72
-rw-r--r--runtime/onert/core/src/ir/operation/ElementwiseBinary.cc52
-rw-r--r--runtime/onert/core/src/ir/operation/ElementwiseUnary.cc65
-rw-r--r--runtime/onert/core/src/ir/operation/Exp.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/Floor.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/L2Pool2D.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/LogicalAnd.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/LogicalNot.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/LogicalOr.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/Logistic.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/Max.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/MaxPool2D.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/Min.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/Mul.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/Neg.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/Pad.cc4
-rw-r--r--runtime/onert/core/src/ir/operation/Pool2D.cc (renamed from runtime/onert/core/src/ir/operation/Sin.cc)20
-rw-r--r--runtime/onert/core/src/ir/operation/RSQRT.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/Rank.cc (renamed from runtime/onert/core/src/ir/operation/Cos.cc)6
-rw-r--r--runtime/onert/core/src/ir/operation/ReLU.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/ReLU1.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/ReLU6.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc (renamed from runtime/onert/core/src/ir/operation/Round.cc)10
-rw-r--r--runtime/onert/core/src/ir/operation/SQRT.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/Sub.cc40
-rw-r--r--runtime/onert/core/src/ir/operation/Tanh.cc39
-rw-r--r--runtime/onert/core/src/ir/operation/ZerosLike.cc39
-rw-r--r--runtime/onert/core/src/ir/pass/PermutationOperationPass.h70
-rw-r--r--runtime/onert/core/src/util/EventRecorder.cc297
-rw-r--r--runtime/onert/core/src/util/EventRecorder.h4
-rw-r--r--runtime/onert/core/src/util/ShapeInference.cc54
-rw-r--r--runtime/onert/frontend/base_loader/include/base_loader.h557
-rw-r--r--runtime/onert/frontend/circle/src/circle_loader.cc6
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc696
-rw-r--r--runtime/onert/frontend/tflite/src/tflite_loader.cc6
-rw-r--r--runtime/onert/test/core/compiler/Scheduler.cc38
-rw-r--r--runtime/onert/test/core/exec/ExecInstance.cc14
-rw-r--r--runtime/onert/test/core/interp/ExecManager.cc26
-rw-r--r--runtime/onert/test/util/ShapeInference.cc39
274 files changed, 5864 insertions, 11867 deletions
diff --git a/runtime/onert/api/include/nnfw.h b/runtime/onert/api/include/nnfw.h
index ef3678b0d..9348df6ae 100644
--- a/runtime/onert/api/include/nnfw.h
+++ b/runtime/onert/api/include/nnfw.h
@@ -103,6 +103,8 @@ typedef enum {
NNFW_STATUS_INVALID_STATE = 3,
/** When it is out of memory */
NNFW_STATUS_OUT_OF_MEMORY = 4,
+ /** When it was given an insufficient output buffer */
+ NNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE = 5,
} NNFW_STATUS;
/**
diff --git a/runtime/onert/api/include/nnfw_experimental.h b/runtime/onert/api/include/nnfw_experimental.h
index 4cd5c585a..94f781988 100644
--- a/runtime/onert/api/include/nnfw_experimental.h
+++ b/runtime/onert/api/include/nnfw_experimental.h
@@ -62,4 +62,38 @@ typedef struct
NNFW_STATUS nnfw_register_custom_op_info(nnfw_session *session, const char *id,
custom_kernel_registration_info *info);
+/**
+ * @brief Get the input tensor index by name
+ *
+ * This function finds an input tensor of the given name.
+ * If found, the index value is set to the address that @c index points to, and returns
+ * @c NNFW_STATUS_NO_ERROR. Otherwise, @c index is unchanged and returns @c NNFW_STATUS_ERROR .
+ *
+ * @note If two or more input tensors are of the same name, the one with the lowest index is always
+ * returned.
+ *
+ * @param[in] session the session object
+ * @param[in] tensorname the name of the tensor to find, a null terminated char pointer string
+ * @param[out] index the index to be ret
+ * @return @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_input_tensorindex(nnfw_session *session, const char *tensorname, uint32_t *index);
+
+/**
+ * @brief Get the input tensor index by name
+ *
+ * This function finds an input tensor of the given name.
+ * If found, the index value is set to the address that @c index points to, and returns
+ * @c NNFW_STATUS_NO_ERROR. Otherwise, @c index is unchanged and returns @c NNFW_STATUS_ERROR .
+ *
+ * @note If two or more input tensors are of the same name, the one with the lowest index is always
+ * returned.
+ *
+ * @param[in] session the session object
+ * @param[in] tensorname the name of the tensor to find, a null terminated char pointer string
+ * @param[out] index the index to be ret
+ * @return @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_output_tensorindex(nnfw_session *session, const char *tensorname, uint32_t *index);
+
#endif // __NNFW_EXPERIMENTAL_H__
diff --git a/runtime/onert/api/include/nnfw_version.h b/runtime/onert/api/include/nnfw_version.h
index 320271a26..42e43760b 100644
--- a/runtime/onert/api/include/nnfw_version.h
+++ b/runtime/onert/api/include/nnfw_version.h
@@ -21,6 +21,6 @@
* NNFW_VERSION is a uint32 value representing nnfw runtime version
* in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch
*/
-#define NNFW_VERSION 0x01000800
+#define NNFW_VERSION 0x01000900
#endif // __NNFW_VERSION_H__
diff --git a/runtime/onert/api/src/nnfw_api.cc b/runtime/onert/api/src/nnfw_api.cc
index d65158fd8..ff5e679da 100644
--- a/runtime/onert/api/src/nnfw_api.cc
+++ b/runtime/onert/api/src/nnfw_api.cc
@@ -33,6 +33,7 @@ STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_ERROR, 1);
STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_UNEXPECTED_NULL, 2);
STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_INVALID_STATE, 3);
STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_OUT_OF_MEMORY, 4);
+STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE, 5);
STATIC_ASSERT_ENUM_CHECK(NNFW_LAYOUT_NONE, 0);
STATIC_ASSERT_ENUM_CHECK(NNFW_LAYOUT_CHANNELS_LAST, 1);
@@ -347,3 +348,15 @@ NNFW_STATUS nnfw_load_circle_from_buffer(nnfw_session *session, uint8_t *buffer,
NNFW_RETURN_ERROR_IF_NULL(session);
return session->load_circle_from_buffer(buffer, size);
}
+
+NNFW_STATUS nnfw_input_tensorindex(nnfw_session *session, const char *tensorname, uint32_t *index)
+{
+ NNFW_RETURN_ERROR_IF_NULL(session);
+ return session->input_tensorindex(tensorname, index);
+}
+
+NNFW_STATUS nnfw_output_tensorindex(nnfw_session *session, const char *tensorname, uint32_t *index)
+{
+ NNFW_RETURN_ERROR_IF_NULL(session);
+ return session->output_tensorindex(tensorname, index);
+}
diff --git a/runtime/onert/api/src/nnfw_api_internal.cc b/runtime/onert/api/src/nnfw_api_internal.cc
index eb0b743d3..81b40703f 100644
--- a/runtime/onert/api/src/nnfw_api_internal.cc
+++ b/runtime/onert/api/src/nnfw_api_internal.cc
@@ -18,6 +18,7 @@
#include "CustomKernelRegistry.h"
#include "compiler/Compiler.h"
#include "util/ConfigSource.h"
+#include "util/Exceptions.h"
#include "exec/Execution.h"
#include "circle_loader.h"
#include "tflite_loader.h"
@@ -37,6 +38,7 @@
#define MAX_BACKEND_NAME_LENGTH 32
#define MAX_OP_NAME_LENGTH 64
#define MAX_PATH_LENGTH 1024
+#define MAX_TENSOR_NAME_LENGTH 64
// Is null-terminating in length ?
static bool null_terminating(const char *str, uint32_t length)
@@ -64,6 +66,32 @@ static onert::ir::Layout convertLayout(NNFW_LAYOUT layout)
return onert::ir::Layout::UNKNOWN;
}
+NNFW_STATUS getTensorIndexImpl(const onert::ir::Graph &graph, const char *tensorname,
+ uint32_t *index, bool is_input)
+{
+ if (!tensorname || !index)
+ return NNFW_STATUS_UNEXPECTED_NULL;
+
+ if (!null_terminating(tensorname, MAX_TENSOR_NAME_LENGTH))
+ {
+ std::cerr << "nnpackage path is too long" << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+
+ auto ind_found = is_input ? graph.getInputIndex(tensorname) : graph.getOutputIndex(tensorname);
+
+ if (ind_found.undefined())
+ {
+ // Not found
+ return NNFW_STATUS_ERROR;
+ }
+ else
+ {
+ *index = ind_found.value();
+ return NNFW_STATUS_NO_ERROR;
+ }
+}
+
nnfw_session::nnfw_session()
: _subgraphs{nullptr}, _execution{nullptr},
_kernel_registry{std::make_shared<onert::frontend::custom::KernelRegistry>()}
@@ -213,6 +241,12 @@ NNFW_STATUS nnfw_session::run()
{
_execution->execute();
}
+ catch (const onert::InsufficientBufferSizeException &e)
+ {
+ // Currently insufficient buffer always means output buffer.
+ std::cerr << "Error during nnfw_session::run : " << e.what() << std::endl;
+ return NNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE;
+ }
catch (const std::exception &e)
{
std::cerr << "Error during nnfw_session::run : " << e.what() << std::endl;
@@ -447,26 +481,27 @@ NNFW_STATUS nnfw_session::apply_tensorinfo(uint32_t index, nnfw_tensorinfo ti)
}
}
+ auto ind = primary_subgraph()->getInputs().at(index);
+ auto &input = primary_subgraph()->operands().at(ind);
+
+ onert::ir::Shape new_shape(ti.rank);
+ for (int32_t i = 0; i < ti.rank; i++)
+ new_shape.dim(i) = ti.dims[i];
+
+ // if passed shape is same with the shape of model, do nothing
+ if (input.info().shape() == new_shape)
+ return NNFW_STATUS_NO_ERROR;
+
if (!isStatePreparedOrFinishedRun())
{
// In this case, if we apply input shape in primary_subgraph, it will propagate after
// compilation and excution
- auto ind = primary_subgraph()->getInputs().at(index);
- auto &input = primary_subgraph()->operands().at(ind);
-
- onert::ir::Shape new_shape(ti.rank);
- for (int32_t i = 0; i < ti.rank; i++)
- new_shape.dim(i) = ti.dims[i];
// overwrite input shape with the shape from ti
input.info().shape(new_shape);
}
else // when called after nnfw_session::prepare()
{
- onert::ir::Shape new_shape(ti.rank);
- for (int32_t i = 0; i < ti.rank; i++)
- new_shape.dim(i) = ti.dims[i];
-
_execution->changeInputShape(onert::ir::IOIndex(index), new_shape);
}
@@ -840,3 +875,13 @@ bool nnfw_session::isStatePreparedOrFinishedRun()
{
return isStatePrepared() || isStateFinishedRun();
}
+
+NNFW_STATUS nnfw_session::input_tensorindex(const char *tensorname, uint32_t *index)
+{
+ return getTensorIndexImpl(*primary_subgraph(), tensorname, index, true);
+}
+
+NNFW_STATUS nnfw_session::output_tensorindex(const char *tensorname, uint32_t *index)
+{
+ return getTensorIndexImpl(*primary_subgraph(), tensorname, index, false);
+}
diff --git a/runtime/onert/api/src/nnfw_api_internal.h b/runtime/onert/api/src/nnfw_api_internal.h
index 1c3c3706f..604ba38b4 100644
--- a/runtime/onert/api/src/nnfw_api_internal.h
+++ b/runtime/onert/api/src/nnfw_api_internal.h
@@ -122,8 +122,6 @@ public:
NNFW_STATUS input_tensorinfo(uint32_t index, nnfw_tensorinfo *ti);
NNFW_STATUS output_tensorinfo(uint32_t index, nnfw_tensorinfo *ti);
- NNFW_STATUS register_custom_operation(const std::string &id, nnfw_custom_eval eval_func);
-
NNFW_STATUS set_available_backends(const char *backends);
NNFW_STATUS set_op_backend(const char *op, const char *backend);
@@ -133,9 +131,16 @@ public:
NNFW_STATUS set_config(const char *key, const char *value);
NNFW_STATUS get_config(const char *key, char *value, size_t value_size);
-
NNFW_STATUS load_circle_from_buffer(uint8_t *buffer, size_t size);
+ //
+ // Experimental API
+ //
+
+ NNFW_STATUS register_custom_operation(const std::string &id, nnfw_custom_eval eval_func);
+ NNFW_STATUS input_tensorindex(const char *tensorname, uint32_t *index);
+ NNFW_STATUS output_tensorindex(const char *tensorname, uint32_t *index);
+
private:
onert::ir::Graph *primary_subgraph();
bool isStateInitialized();
diff --git a/runtime/onert/backend/acl_cl/Backend.h b/runtime/onert/backend/acl_cl/Backend.h
index 8aaf516cd..5c5041378 100644
--- a/runtime/onert/backend/acl_cl/Backend.h
+++ b/runtime/onert/backend/acl_cl/Backend.h
@@ -25,6 +25,7 @@
#include "KernelGenerator.h"
#include "TensorManager.h"
#include "Optimizer.h"
+#include "AclTensorRegistry.h"
namespace onert
{
@@ -47,10 +48,13 @@ public:
const auto &operands = graph.operands();
const auto &operations = graph.operations();
auto context = std::make_unique<BackendContext>(this, &graph);
- auto tb = std::make_shared<TensorBuilder>(operands, createTensorManager(is_linear_executor));
+ auto tm = createTensorManager(is_linear_executor);
+ auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
+ auto tb = std::make_shared<TensorBuilder>(operands, tm, tr);
+ context->tensor_registry = tr;
context->tensor_builder = tb;
- context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb);
- context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb);
+ context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+ context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr);
context->tensor_register = nullptr;
context->optimizer = std::make_shared<Optimizer>(context.get());
return context;
diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.cc b/runtime/onert/backend/acl_cl/ConstantInitializer.cc
index d7f5f8031..31f1c10eb 100644
--- a/runtime/onert/backend/acl_cl/ConstantInitializer.cc
+++ b/runtime/onert/backend/acl_cl/ConstantInitializer.cc
@@ -24,78 +24,17 @@ namespace acl_cl
{
ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
- : IConstantInitializer{operands}, _tensor_builder{tensor_builder}
+ const std::shared_ptr<ITensorRegistry> &tensor_reg)
+ : acl_common::AclConstantInitializer{operands, tensor_reg}
{
// DO NOTHING
}
-void ConstantInitializer::copyInputInitialize(const ir::Operation &node, uint32_t index)
-{
- assert(node.getInputs().size() > index);
-
- const auto &input_index = node.getInputs().at(index);
- const auto &input_obj = _operands.at(input_index);
- registerCopyInitializer(input_index, input_obj);
-}
-
-void ConstantInitializer::permuteInputInitialize(const ir::Operation &node, uint32_t index)
-{
- assert(node.getInputs().size() > index);
-
- const auto &input_index = node.getInputs().at(index);
- const auto &input_obj = _operands.at(input_index);
- registerPermuteInitializer(input_index, input_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::BatchToSpaceND &node)
-{
- const auto &block_size_index = node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE);
- const auto &block_size_obj = _operands.at(block_size_index);
-
- if (block_size_obj.isConstant())
- {
- _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
- assert(model_obj.data());
- const auto &shape = model_obj.shape();
- const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
- assert(model_obj.shape().rank() == 1);
- obj.access([&](ITensor &tensor) {
- for (size_t i = 0; i < shape.num_elements(); ++i)
- {
- const int32_t value = base[shape.num_elements() - i - 1];
- int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
- tensor.calcOffset({static_cast<int32_t>(i)}));
- *into = value;
- }
- });
- };
- }
-}
-
-void ConstantInitializer::visit(const ir::operation::Conv2D &node)
-{
- permuteInputInitialize(node, ir::operation::Conv2D::KERNEL);
- copyInputInitialize(node, ir::operation::Conv2D::BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
-{
- permuteInputInitialize(node, ir::operation::DepthwiseConv2D::KERNEL);
- copyInputInitialize(node, ir::operation::DepthwiseConv2D::BIAS);
-}
-
void ConstantInitializer::visit(const ir::operation::EmbeddingLookup &node)
{
copyInputInitialize(node, ir::operation::EmbeddingLookup::LOOKUPS);
}
-void ConstantInitializer::visit(const ir::operation::FullyConnected &node)
-{
- copyInputInitialize(node, ir::operation::FullyConnected::WEIGHT);
- copyInputInitialize(node, ir::operation::FullyConnected::BIAS);
-}
-
void ConstantInitializer::visit(const ir::operation::Gather &node)
{
copyInputInitialize(node, ir::operation::Gather::INDICES);
@@ -107,33 +46,6 @@ void ConstantInitializer::visit(const ir::operation::HashtableLookup &node)
copyInputInitialize(node, ir::operation::HashtableLookup::KEYS);
}
-void ConstantInitializer::visit(const ir::operation::LSTM &node)
-{
- copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::INPUT_GATE_BIAS);
- copyInputInitialize(node, ir::operation::LSTM::FORGET_GATE_BIAS);
- copyInputInitialize(node, ir::operation::LSTM::OUTPUT_GATE_BIAS);
- copyInputInitialize(node, ir::operation::LSTM::PROJECTION_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::PROJECTION_BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::RNN &node)
-{
- copyInputInitialize(node, ir::operation::RNN::WEIGHTS);
- copyInputInitialize(node, ir::operation::RNN::RECURRENT_WEIGHTS);
- copyInputInitialize(node, ir::operation::RNN::BIAS);
-}
-
void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
{
const auto &block_size_index = node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE);
@@ -184,13 +96,6 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
}
}
-void ConstantInitializer::visit(const ir::operation::TransposeConv &node)
-{
- const auto &kernel_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL);
- const auto &kernel_obj = _operands.at(kernel_index);
- registerPermuteInitializer(kernel_index, kernel_obj);
-}
-
} // namespace acl_cl
} // namespace backend
} // namespace onert
diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.h b/runtime/onert/backend/acl_cl/ConstantInitializer.h
index c51f72b11..4f894fd31 100644
--- a/runtime/onert/backend/acl_cl/ConstantInitializer.h
+++ b/runtime/onert/backend/acl_cl/ConstantInitializer.h
@@ -17,9 +17,7 @@
#ifndef __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
#define __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
-#include <backend/IConstantInitializer.h>
-#include <ir/Operands.h>
-#include "TensorBuilder.h"
+#include "AclConstantInitializer.h"
namespace onert
{
@@ -28,32 +26,18 @@ namespace backend
namespace acl_cl
{
-class ConstantInitializer : public IConstantInitializer
+class ConstantInitializer : public acl_common::AclConstantInitializer
{
public:
ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder);
+ const std::shared_ptr<ITensorRegistry> &tensor_reg);
public:
- void visit(const ir::operation::BatchToSpaceND &) override;
- void visit(const ir::operation::Conv2D &) override;
- void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::EmbeddingLookup &) override;
- void visit(const ir::operation::FullyConnected &) override;
- void visit(const ir::operation::Gather &) override;
- void visit(const ir::operation::HashtableLookup &) override;
- void visit(const ir::operation::LSTM &) override;
- void visit(const ir::operation::RNN &) override;
- void visit(const ir::operation::SpaceToBatchND &) override;
- void visit(const ir::operation::TransposeConv &) override;
-
-private:
- std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
- void copyInputInitialize(const ir::Operation &node, uint32_t index);
- void permuteInputInitialize(const ir::Operation &node, uint32_t index);
-
-private:
- std::shared_ptr<TensorBuilder> _tensor_builder;
+ using acl_common::AclConstantInitializer::visit;
+ void visit(const ir::operation::EmbeddingLookup &) final;
+ void visit(const ir::operation::Gather &) final;
+ void visit(const ir::operation::HashtableLookup &) final;
+ void visit(const ir::operation::SpaceToBatchND &) final;
};
} // namespace acl_cl
diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.cc b/runtime/onert/backend/acl_cl/KernelGenerator.cc
index a84f983b4..94489253d 100644
--- a/runtime/onert/backend/acl_cl/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_cl/KernelGenerator.cc
@@ -40,15 +40,16 @@ namespace backend
namespace acl_cl
{
-using ::onert::backend::acl_common::asAclClFunction;
+using ::onert::backend::acl_common::asAclFunction;
using ActivationBuilder = ::onert::backend::acl_common::AclActivationBuilder<
- ::arm_compute::ICLTensor, ::arm_compute::CLActivationLayer, acl_common::AclClFunction>;
+ ::arm_compute::ICLTensor, ::arm_compute::CLActivationLayer, acl_common::AclFunction>;
-KernelGenerator::KernelGenerator(const ir::Operands &operands_ctx,
- const ir::Operations &operations_ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
+KernelGenerator::KernelGenerator(
+ const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+ const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
: _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder),
- _current_op_seq_layout(ir::Layout::UNKNOWN)
+ _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN)
{
// DO NOTHING
}
@@ -77,51 +78,69 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
const auto block_size_index{
node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto block_size_tensor = _tensor_builder->at(block_size_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
assert(_ctx.at(block_size_index).data());
- auto fn = std::make_unique<::arm_compute::CLBatchToSpaceLayer>();
+ auto fn = acl_common::generateLayer<arm_compute::CLBatchToSpaceLayer>(
+ ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
- fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::Cast &node)
+void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
+ const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ const auto activation = node.param().activation;
- std::unique_ptr<::arm_compute::IFunction> fn;
- if (ifm_tensor->data_type() == ofm_tensor->data_type())
- {
- auto l = std::make_unique<::arm_compute::CLCopy>();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
- l->configure(ifm_tensor->handle(), ofm_tensor->handle());
+ const auto act_info = acl_common::asActivationLayerInfo(activation);
- fn = std::move(l);
- }
- else
+ std::unique_ptr<arm_compute::IFunction> fn;
+ switch (node.param().arithmetic_type)
{
- auto l = std::make_unique<::arm_compute::CLCast>();
-
- // TODO Support converting float to int32 as round down
- l->configure(ifm_tensor->handle(), ofm_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
-
- fn = std::move(l);
+ case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLArithmeticAddition>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+ arm_compute::ConvertPolicy::SATURATE, act_info);
+ break;
+ }
+ case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLArithmeticSubtraction>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+ arm_compute::ConvertPolicy::SATURATE, act_info);
+ break;
+ }
+ case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLPixelWiseMultiplication>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
+ arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN,
+ act_info);
+ break;
+ }
+ case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLArithmeticDivision>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), act_info);
+ break;
+ }
+ default:
+ assert(false && "The BinaryArithmetic operation supports only binary arithmetic operations");
+ break;
}
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Conv2D &node)
@@ -145,22 +164,20 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
ker_width, ker_height);
const auto activation = node.param().activation;
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto ker_tensor = _tensor_builder->at(ker_index).get();
- auto bias_tensor = _tensor_builder->at(bias_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
+ auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
const auto act_info = acl_common::asActivationLayerInfo(activation);
- auto fn = std::make_unique<::arm_compute::CLConvolutionLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
- ofm_tensor->handle(), conv_info, ::arm_compute::WeightsInfo(),
- ::arm_compute::Size2D(1U, 1U), act_info);
+ auto fn = acl_common::generateLayer<arm_compute::CLConvolutionLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
+ ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info,
+ ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
- _return_fn = asAclClFunction(std::move(fn));
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
@@ -185,50 +202,23 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
const auto multiplier = node.param().multiplier;
const auto activation = node.param().activation;
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto ker_tensor = _tensor_builder->at(ker_index).get();
- auto bias_tensor = _tensor_builder->at(bias_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
+ auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
const auto act_info = acl_common::asActivationLayerInfo(activation);
{
- auto fn = std::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
-
- fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
- ofm_tensor->handle(), conv_info, multiplier, act_info);
+ auto fn = acl_common::generateLayer<arm_compute::CLDepthwiseConvolutionLayer>(
+ ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(),
+ conv_info, multiplier, act_info);
- _return_fn = asAclClFunction(std::move(fn));
+ _return_fn = asAclFunction(std::move(fn));
}
}
-void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
-{
- auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
- node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::MAX);
-
- const auto ofm_index{node.getOutputs().at(0)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- const auto activation = node.param().activation;
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(raw_fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
-{
- auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
- node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::AVG);
-
- const auto ofm_index{node.getOutputs().at(0)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- const auto activation = node.param().activation;
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(raw_fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
void KernelGenerator::visit(const ir::operation::Concat &node)
{
const auto ofm_index{node.getOutputs().at(0)};
@@ -250,70 +240,44 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
return;
}
- auto output_tensor = _tensor_builder->at(ofm_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(ofm_index).get();
std::vector<::arm_compute::ICLTensor *> input_tensors;
for (auto &ifm_ind : input_indexes)
- input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle());
+ input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle());
std::unique_ptr<::arm_compute::IFunction> fn;
if (input_indexes.size() < 2)
{
- auto l = std::make_unique<::arm_compute::CLCopy>();
- l->configure(input_tensors.at(0), output_tensor->handle());
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tensors.at(0),
+ output_tensor->handle());
}
else
{
- auto l = std::make_unique<::arm_compute::CLConcatenateLayer>();
const auto rank = _ctx.at(ofm_index).shape().rank();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = output_tensor->layout();
const auto fixed_axis =
acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
- l->configure(input_tensors, output_tensor->handle(), fixed_axis);
- fn = std::move(l);
+ fn = acl_common::generateLayer<::arm_compute::CLConcatenateLayer>(
+ input_tensors, output_tensor->handle(), fixed_axis);
}
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::FullyConnected &node)
{
const auto output_index{node.getOutputs().at(0)};
- auto output_tensor = _tensor_builder->at(output_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
const auto activation = node.param().activation;
- auto fn = acl_common::kernelGenFullyConnected<acl_common::AclClFunction, ::arm_compute::ICLTensor,
+ auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ICLTensor,
::arm_compute::CLFullyConnectedReshapingLayer>(
- node, _ctx, _tensor_builder, _current_op_seq_layout);
+ node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout);
_return_fn = std::make_unique<exec::FunctionSequence>(
std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
}
-void KernelGenerator::visit(const ir::operation::Mul &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLPixelWiseMultiplication>();
-
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
- arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
-
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
void KernelGenerator::visit(const ir::operation::Reduce &node)
{
const auto output_index{node.getOutputs().at(0)};
@@ -322,8 +286,8 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
const auto keep_dims{node.param().keep_dims};
const auto reduce_type = node.param().reduce_type;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
// Convert to ACL axes taking into account negative values and possible duplicates.
const auto &axes = _ctx.at(axes_index);
@@ -334,29 +298,21 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
std::unique_ptr<arm_compute::IFunction> fn;
if (reduce_type == ir::operation::Reduce::ReduceType::MEAN)
{
- auto l = std::make_unique<::arm_compute::CLReduceMean>();
-
const auto acl_axes =
acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
- l->configure(input_tensor->handle(), acl_axes, keep_dims, output_tensor->handle());
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::CLReduceMean>(input_tensor->handle(), acl_axes,
+ keep_dims, output_tensor->handle());
}
else
{
- auto l = std::make_unique<::arm_compute::CLReduceOperation>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
const auto acl_axes = acl_common::asSet(axes, input_rank, frontend_layout, backend_layout);
- l->configure(input_tensor->handle(), output_tensor->handle(), acl_axes, keep_dims,
- acl_common::convertReduceType(reduce_type));
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::CLReduceOperation>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+ output_tensor->handle(), acl_axes, keep_dims, acl_common::convertReduceType(reduce_type));
}
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Reshape &node)
@@ -364,8 +320,8 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
// NOTE This operation must not be changed the layout from frontend to backend
// So, PermutationOperationPass makes layouts of frontend and backend the same.
@@ -376,13 +332,10 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
UNUSED_RELEASE(frontend_layout);
UNUSED_RELEASE(backend_layout);
- auto fn = std::make_unique<::arm_compute::CLReshapeLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
+ output_tensor->handle());
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Squeeze &node)
@@ -398,32 +351,11 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node)
(void)dims;
(void)ndim;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
- auto fn = std::make_unique<arm_compute::CLReshapeLayer>();
- fn->configure(input_tensor->handle(), output_tensor->handle());
- auto acl_fn = asAclClFunction(std::move(fn));
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Tanh &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<arm_compute::CLActivationLayer>();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+ auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Softmax &node)
@@ -433,17 +365,14 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
const auto beta = node.param().beta;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLSoftmaxLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), beta);
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLSoftmaxLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+ output_tensor->handle(), beta);
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Slice &node)
@@ -453,8 +382,8 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
- auto outputData_tensor = _tensor_builder->at(output_index).get();
- auto inputData_tensor = _tensor_builder->at(input_index).get();
+ auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = inputData_tensor->layout();
@@ -506,13 +435,10 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
ends_set.set(i, ends[i]);
}
- auto fn = std::make_unique<::arm_compute::CLSlice>();
-
- fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
-
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLSlice>(
+ inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::StridedSlice &node)
@@ -523,8 +449,8 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
- auto outputData_tensor = _tensor_builder->at(output_index).get();
- auto inputData_tensor = _tensor_builder->at(input_index).get();
+ auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = inputData_tensor->layout();
@@ -597,14 +523,11 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
strides_set.set(i, strides[i]);
}
- auto fn = std::make_unique<::arm_compute::CLStridedSlice>();
-
- fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set,
- strides_set, begin_mask, end_mask, shrink_axis_mask);
-
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLStridedSlice>(
+ inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
+ begin_mask, end_mask, shrink_axis_mask);
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Transpose &node)
@@ -615,8 +538,8 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
const auto rank = _ctx.at(ifm_idx).shape().rank();
- auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
- auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = ifm_tensor->layout();
@@ -625,93 +548,168 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
auto backend_pv = ::onert::backend::acl_common::getARMComputePermutationVector(
rank, pv, frontend_layout, backend_layout);
- auto fn = std::make_unique<::arm_compute::CLPermute>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), backend_pv);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Add &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLArithmeticAddition>();
-
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
- arm_compute::ConvertPolicy::SATURATE);
+ auto fn = acl_common::generateLayer<::arm_compute::CLPermute>(ifm_tensor->handle(),
+ ofm_tensor->handle(), backend_pv);
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
+ _return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::Sub &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
-
- const auto activation = node.param().activation;
+ const auto ifm_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- auto fn = std::make_unique<::arm_compute::CLArithmeticSubtraction>();
+ const ::arm_compute::ActivationLayerInfo act_info = acl_common::asActivationLayerInfo(
+ node.param().op_type, node.param().alpha, node.param().beta);
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
- arm_compute::ConvertPolicy::SATURATE);
+ auto fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), act_info);
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
+ _return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::Div &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
+ const auto output_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
- auto fn = std::make_unique<::arm_compute::CLArithmeticDivision>();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+ std::unique_ptr<arm_compute::IFunction> fn;
+ switch (node.param().op_type)
+ {
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLBinaryLogicalOp>(
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle(),
+ arm_compute::BinaryLogicalOperation::AND);
+ break;
+ }
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLBitwiseOr>(
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLElementwiseMax>(
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLElementwiseMin>(
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ break;
+ }
+ default:
+ {
+ std::string err_msg("acl_cl KernelGenerator : " + node.name() +
+ "is not elementwise-binary operations");
+ assert(false && err_msg.c_str());
+ break;
+ }
+ }
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
+ _return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::Exp &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
+ const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
- auto fn = std::make_unique<::arm_compute::CLExpLayer>();
+ std::unique_ptr<arm_compute::IFunction> fn;
+ switch (node.param().op_type)
+ {
+ case ir::operation::ElementwiseUnary::Type::ABS:
+ {
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
- fn->configure(input_tensor->handle(), output_tensor->handle());
+ fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
+ input_tensor->handle(), output_tensor->handle(), act_info);
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::CAST:
+ {
+ if (input_tensor->data_type() == output_tensor->data_type())
+ {
+ fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tensor->handle(),
+ output_tensor->handle());
+ ;
+ }
+ else
+ {
+ // TODO Support converting float to int32 as round down
+ fn = acl_common::generateLayer<arm_compute::CLCast>(
+ input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
+ }
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::DEQUANTIZE:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLDequantizationLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::EXP:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLExpLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::FLOOR:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLFloor>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLBitwiseNot>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::NEG:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLNeg>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::RSQRT:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLRsqrtLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::SQRT:
+ {
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
- auto acl_fn = asAclClFunction(std::move(fn));
+ fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
+ input_tensor->handle(), output_tensor->handle(), act_info);
+ break;
+ }
+ default:
+ {
+ throw std::runtime_error("acl_cl KernelGenerator : " + node.name() + "is not supported yet");
+ break;
+ }
+ }
+
+ auto acl_fn = asAclFunction(std::move(fn));
_return_fn = std::move(acl_fn);
}
@@ -721,16 +719,13 @@ void KernelGenerator::visit(const ir::operation::ExpandDims &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLReshapeLayer>();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
- fn->configure(input_tensor->handle(), output_tensor->handle());
+ auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
+ output_tensor->handle());
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
@@ -740,67 +735,25 @@ void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto gamma_tensor = _tensor_builder->at(gamma_index).get();
- auto beta_tensor = _tensor_builder->at(beta_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto gamma_tensor = _tensor_reg->getAclTensor(gamma_index).get();
+ auto beta_tensor = _tensor_reg->getAclTensor(beta_index).get();
auto epsilon = node.param().epsilon;
auto activation = node.param().activation;
- auto fn = std::make_unique<::arm_compute::CLInstanceNormalizationLayerEx>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(),
- beta_tensor->handle(), epsilon);
+ auto fn = acl_common::generateLayer<arm_compute::CLInstanceNormalizationLayerEx>(
+ ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(),
+ epsilon);
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::Logistic &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
-
- auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalAnd &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)};
- const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input0_tensor = _tensor_builder->at(input0_index).get();
- auto input1_tensor = _tensor_builder->at(input1_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLBinaryLogicalOp>();
-
- fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
- ::arm_compute::BinaryLogicalOperation::AND);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::LSTM &node)
{
- _return_fn = acl_common::kernelGenLSTM<acl_common::AclClFunction, ::arm_compute::ICLTensor,
- ::arm_compute::CLLSTMLayer>(node, _ctx, _tensor_builder);
+ _return_fn = acl_common::kernelGenLSTM<acl_common::AclFunction, ::arm_compute::ICLTensor,
+ ::arm_compute::CLLSTMLayer>(node, _ctx, _tensor_reg);
}
void KernelGenerator::visit(const ir::operation::Comparison &node)
@@ -811,18 +764,15 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
const auto comparison_type = node.param().comparison_type;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input0_tensor = _tensor_builder->at(input0_index).get();
- auto input1_tensor = _tensor_builder->at(input1_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLComparison>();
-
- fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
- (arm_compute::ComparisonOperation)comparison_type);
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input0_tensor = _tensor_reg->getAclTensor(input0_index).get();
+ auto input1_tensor = _tensor_reg->getAclTensor(input1_index).get();
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLComparison>(
+ input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
+ (arm_compute::ComparisonOperation)comparison_type);
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Pack &node)
@@ -836,26 +786,24 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
for (const auto &input_index : node.getInputs())
input_indexes.emplace_back(input_index);
- auto output = _tensor_builder->at(output_index).get()->handle();
+ auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
std::vector<arm_compute::ICLTensor *> inputs;
for (const auto &input_index : input_indexes)
- inputs.emplace_back(_tensor_builder->at(input_index)->handle());
+ inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle());
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = _tensor_builder->at(output_index).get()->layout();
+ const auto backend_layout = _tensor_reg->getAclTensor(output_index).get()->layout();
if (axis < 0)
axis += output_rank;
axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
- auto fn = std::make_unique<::arm_compute::CLStackLayer>();
-
// Disable applied dim_correction
std::vector<arm_compute::TensorShape> orig_inputs_acl_tensor_shapes;
for (const auto &input_index : input_indexes)
{
size_t input_rank = _ctx.at(input_index).shape().rank();
- const auto &input_tensor = _tensor_builder->at(input_index);
+ const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
orig_inputs_acl_tensor_shapes.emplace_back(input_tensor->info()->tensor_shape());
assert(input_rank == input_tensor->num_dimensions());
if (input_rank != input_tensor->info()->num_dimensions())
@@ -866,7 +814,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
}
}
- fn->configure(inputs, axis, output);
+ auto fn = acl_common::generateLayer<arm_compute::CLStackLayer>(inputs, axis, output);
// Revert disabling applied dim_correction
assert(inputs.size() == orig_inputs_acl_tensor_shapes.size());
@@ -875,7 +823,21 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
inputs.at(i)->info()->set_tensor_shape(orig_inputs_acl_tensor_shapes.at(i));
}
- _return_fn = asAclClFunction(std::move(fn));
+ _return_fn = asAclFunction(std::move(fn));
+}
+
+void KernelGenerator::visit(const ir::operation::Pool2D &node)
+{
+ auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
+ node, _ctx, _tensor_reg, _current_op_seq_layout,
+ acl_common::convertPoolType(node.param().op_type));
+
+ const auto ofm_index{node.getOutputs().at(0)};
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ const auto activation = node.param().activation;
+ _return_fn = std::make_unique<exec::FunctionSequence>(
+ asAclFunction(std::move(raw_fn)),
+ ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Permute &node)
@@ -883,8 +845,8 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
const auto ofm_idx{node.getOutputs().at(0)};
const auto ifm_idx{node.getInputs().at(0)};
const auto permute_type = node.getPermuteType();
- auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
- auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
const auto rank = _ctx.at(ofm_idx).shape().rank();
assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank());
@@ -895,70 +857,23 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
// WHCN -> CWHN
pv = arm_compute::PermutationVector{2, 0, 1};
- auto l = std::make_unique<::arm_compute::CLPermute>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::CLPermute>(ifm_tensor->handle(),
+ ofm_tensor->handle(), pv);
}
else if (permute_type == ir::operation::Permute::Type::NHWC_TO_NCHW && rank == 4)
{
// CWHN -> WHCN
pv = arm_compute::PermutationVector{1, 2, 0};
- auto l = std::make_unique<::arm_compute::CLPermute>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<::arm_compute::CLPermute>(ifm_tensor->handle(),
+ ofm_tensor->handle(), pv);
}
else
{
- auto l = std::make_unique<::arm_compute::CLCopy>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::CLCopy>(ifm_tensor->handle(), ofm_tensor->handle());
}
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::RSQRT &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLRsqrtLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
- _return_fn = asAclClFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<arm_compute::CLActivationLayer>();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
@@ -967,58 +882,32 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- auto fn = std::make_unique<::arm_compute::CLScale>();
+ auto fn = acl_common::generateLayer<arm_compute::CLScale>(
+ ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::InterpolationPolicy::BILINEAR,
+ ::arm_compute::BorderMode::REPLICATE, ::arm_compute::PixelValue(0.f),
+ ::arm_compute::SamplingPolicy::TOP_LEFT);
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(),
- ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
- ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU1 &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
-
- auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::ReLU6 &node)
+void KernelGenerator::visit(const ir::operation::ResizeNearestNeighbor &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f};
+ const auto ifm_index{node.getInputs().at(ir::operation::ResizeNearestNeighbor::Input::INPUT)};
- auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
+ auto fn = acl_common::generateLayer<arm_compute::CLScale>(
+ ifm_tensor->handle(), ofm_tensor->handle(),
+ ::arm_compute::InterpolationPolicy::NEAREST_NEIGHBOR, ::arm_compute::BorderMode::REPLICATE,
+ ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::RNN &node)
@@ -1036,43 +925,25 @@ void KernelGenerator::visit(const ir::operation::RNN &node)
const auto activation = node.param().activation;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto hidden_state_out_tensor = _tensor_builder->at(hidden_state_out_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto hidden_state_out_tensor = _tensor_reg->getAclTensor(hidden_state_out_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
- auto weights_tensor = _tensor_builder->at(weights_index).get();
- auto recurrent_weights_tensor = _tensor_builder->at(recurrent_weights_index).get();
- auto bias_tensor = _tensor_builder->at(bias_index).get();
- auto hidden_state_in_tensor = _tensor_builder->at(hidden_state_in_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+ auto weights_tensor = _tensor_reg->getAclTensor(weights_index).get();
+ auto recurrent_weights_tensor = _tensor_reg->getAclTensor(recurrent_weights_index).get();
+ auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
+ auto hidden_state_in_tensor = _tensor_reg->getAclTensor(hidden_state_in_index).get();
auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
- auto copy_layer = std::make_unique<::arm_compute::CLCopy>();
- copy_layer->configure(hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
- _return_fn = asAclClFunction(std::move(copy_layer));
+ auto copy_layer = acl_common::generateLayer<arm_compute::CLCopy>(
+ hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
+ _return_fn = asAclFunction(std::move(copy_layer));
- auto fn = std::make_unique<::arm_compute::CLRNNLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
- fn->configure(input_tensor->handle(), weights_tensor->handle(),
- recurrent_weights_tensor->handle(), bias_tensor->handle(),
- hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
- _return_fn = asAclClFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Floor &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLFloor>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ auto fn = acl_common::generateLayer<arm_compute::CLRNNLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+ weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(),
+ hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
@@ -1083,24 +954,19 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto block_size_tensor = _tensor_builder->at(block_size_index).get();
- auto paddings_tensor = _tensor_builder->at(paddings_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
+ auto paddings_tensor = _tensor_reg->getAclTensor(paddings_index).get();
assert(_ctx.at(block_size_index).data());
assert(_ctx.at(paddings_index).data());
- std::unique_ptr<::arm_compute::IFunction> fn;
-
- auto l = std::make_unique<::arm_compute::CLSpaceToBatchLayer>();
- l->configure(ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
- ofm_tensor->handle());
- fn = std::move(l);
-
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLSpaceToBatchLayer>(
+ ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
+ ofm_tensor->handle());
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
@@ -1110,29 +976,13 @@ void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
auto block_size = node.param().block_size;
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLSpaceToDepthLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), block_size);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
-void KernelGenerator::visit(const ir::operation::L2Pool2D &node)
-{
- auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
- node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::L2);
+ auto fn = acl_common::generateLayer<arm_compute::CLSpaceToDepthLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), block_size);
- const auto ofm_index{node.getOutputs().at(0)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- const auto activation = node.param().activation;
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(raw_fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
@@ -1141,17 +991,14 @@ void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto lookups_tensor = _tensor_builder->at(lookups_index).get();
- auto values_tensor = _tensor_builder->at(values_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLEmbeddingLookup>();
-
- fn->configure(values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
+ auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLEmbeddingLookup>(
+ values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::L2Normalization &node)
@@ -1173,19 +1020,16 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction)
float bias = 0.0f; // Don't offset the reduction.
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
radius, alpha, beta, bias, false);
- auto fn = std::make_unique<::arm_compute::CLNormalizationLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
-
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLNormalizationLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
@@ -1197,21 +1041,18 @@ void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto hits_tensor = _tensor_builder->at(hits_index).get();
-
- auto lookups_tensor = _tensor_builder->at(lookups_index).get();
- auto keys_tensor = _tensor_builder->at(keys_index).get();
- auto values_tensor = _tensor_builder->at(values_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLHashtableLookup>();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto hits_tensor = _tensor_reg->getAclTensor(hits_index).get();
- fn->configure(lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
- output_tensor->handle(), hits_tensor->handle());
+ auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
+ auto keys_tensor = _tensor_reg->getAclTensor(keys_index).get();
+ auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLHashtableLookup>(
+ lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
+ output_tensor->handle(), hits_tensor->handle());
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::PReLU &node)
@@ -1220,17 +1061,14 @@ void KernelGenerator::visit(const ir::operation::PReLU &node)
const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto alpha_tensor = _tensor_builder->at(alpha_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLPReluLayer>();
-
- fn->configure(ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index).get();
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLPReluLayer>(
+ ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::TransposeConv &node)
@@ -1258,77 +1096,18 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
}
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto ker_tensor = _tensor_builder->at(ker_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
- auto fn = std::make_unique<::arm_compute::CLTransposeConvLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- fn->configure(ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(),
- tconv_info, invalid_horizontal, invalid_vertical);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::SQRT &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
-
- auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalOr &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)};
- const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input0_tensor = _tensor_builder->at(input0_index).get();
- auto input1_tensor = _tensor_builder->at(input1_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLBitwiseOr>();
-
- fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalNot &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLBitwiseNot>();
+ auto fn = acl_common::generateLayer<arm_compute::CLTransposeConvLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
+ ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info, invalid_horizontal,
+ invalid_vertical);
- fn->configure(input_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
@@ -1337,17 +1116,14 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
- auto fn = std::make_unique<::arm_compute::CLElementwiseSquaredDiff>();
+ auto fn = acl_common::generateLayer<arm_compute::CLElementwiseSquaredDiff>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::TopKV2 &node)
@@ -1364,17 +1140,14 @@ void KernelGenerator::visit(const ir::operation::TopKV2 &node)
const auto k = node.param().k;
- auto values_tensor = _tensor_builder->at(outputValues_index).get();
- auto indices_tensor = _tensor_builder->at(outputIndices_index).get();
- auto input_tensor = _tensor_builder->at(inputData_index).get();
+ auto values_tensor = _tensor_reg->getAclTensor(outputValues_index).get();
+ auto indices_tensor = _tensor_reg->getAclTensor(outputIndices_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(inputData_index).get();
- auto fn = std::make_unique<::arm_compute::CLTopKV2>();
+ auto fn = acl_common::generateLayer<arm_compute::CLTopKV2>(
+ input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle());
- fn->configure(input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Gather &node)
@@ -1389,9 +1162,9 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
const auto axis_value = (axis_raw < 0 ? (ifm_rank + axis_raw) : axis_raw);
const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto indices_tensor = _tensor_builder->at(indices_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto indices_tensor = _tensor_reg->getAclTensor(indices_index).get();
// NOTE The frontend layout and backend layout must be the same for this operation.
// If not the same, we have to add a stage(?) to perform permutation of output tensor. It
@@ -1407,8 +1180,6 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
assert(backend_layout == indices_tensor->layout());
assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
- auto fn = std::make_unique<::arm_compute::CLGatherEx>();
-
// input is n-D, indices k-D, output is (n + k - 1)-D
size_t n = ifm_rank;
assert(n == ifm_tensor->num_dimensions());
@@ -1433,52 +1204,14 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
acl_common::asTensorShape(indices.shape(), _current_op_seq_layout, backend_layout, false));
}
- fn->configure(ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
+ auto fn = acl_common::generateLayer<arm_compute::CLGatherEx>(
+ ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
// Revert disabling applied dim_correction
ifm_tensor->info()->set_tensor_shape(orig_ifm_acl_tensor_shape);
indices_tensor->info()->set_tensor_shape(orig_indice_acl_tensor_shape);
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Neg &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLNeg>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Abs &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
-
- auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::ArgMax &node)
@@ -1491,8 +1224,8 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
assert((ifm_shape.rank() - 1) == ofm_shape.rank());
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
auto frontend_layout = _current_op_seq_layout;
auto backend_layout = ifm_tensor->layout();
@@ -1506,31 +1239,11 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
auto acl_axis =
acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
- auto fn = std::make_unique<::arm_compute::CLArgMinMaxLayer>();
+ auto fn = acl_common::generateLayer<arm_compute::CLArgMinMaxLayer>(
+ ifm_tensor->handle(), acl_axis, ofm_tensor->handle(),
+ ::arm_compute::ReductionOperation::ARG_IDX_MAX);
- fn->configure(ifm_tensor->handle(), acl_axis, ofm_tensor->handle(),
- ::arm_compute::ReductionOperation::ARG_IDX_MAX);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Dequantize &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLDequantizationLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node)
@@ -1544,19 +1257,16 @@ void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &nod
auto beta = node.param().beta;
auto bias = node.param().bias;
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
const auto norm_info = ::arm_compute::NormalizationLayerInfo(
::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
- auto fn = std::make_unique<::arm_compute::CLNormalizationLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
-
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLNormalizationLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
@@ -1567,16 +1277,13 @@ void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
auto block_size = node.param().block_size;
assert(block_size > 0);
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLDepthToSpaceLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), block_size);
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLDepthToSpaceLayer>(
+ input_tensor->handle(), output_tensor->handle(), block_size);
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Split &node)
@@ -1590,10 +1297,10 @@ void KernelGenerator::visit(const ir::operation::Split &node)
for (const auto &output : node.getOutputs())
output_indexes.emplace_back(output);
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
std::vector<arm_compute::ICLTensor *> output_tensors;
for (const auto &ofm_ind : output_indexes)
- output_tensors.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
+ output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind).get()->handle());
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = ifm_tensor->layout();
@@ -1602,11 +1309,10 @@ void KernelGenerator::visit(const ir::operation::Split &node)
axis += ifm_rank;
axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value();
- auto fn = std::make_unique<::arm_compute::CLSplit>();
-
- fn->configure(ifm_tensor->handle(), output_tensors, axis);
+ auto fn =
+ acl_common::generateLayer<arm_compute::CLSplit>(ifm_tensor->handle(), output_tensors, axis);
- _return_fn = asAclClFunction(std::move(fn));
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Unpack &node)
@@ -1620,13 +1326,13 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
for (const auto &output_index : node.getOutputs())
output_indexes.emplace_back(output_index);
- auto input = _tensor_builder->at(input_index).get()->handle();
+ auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
std::vector<arm_compute::ICLTensor *> outputs;
for (const auto &output_index : output_indexes)
- outputs.emplace_back(_tensor_builder->at(output_index)->handle());
+ outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
+ const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
if (axis < 0)
axis += input_rank;
axis = acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value();
@@ -1636,7 +1342,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
for (const auto &output_index : output_indexes)
{
size_t output_rank = _ctx.at(output_index).shape().rank();
- const auto &output_tensor = _tensor_builder->at(output_index);
+ const auto &output_tensor = _tensor_reg->getAclTensor(output_index);
orig_outputs_acl_tensor_shapes.emplace_back(output_tensor->info()->tensor_shape());
assert(output_rank == output_tensor->num_dimensions());
if (output_rank != output_tensor->info()->num_dimensions())
@@ -1647,11 +1353,9 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
}
}
- auto fn = std::make_unique<::arm_compute::CLUnstack>();
-
- fn->configure(input, outputs, axis);
+ auto fn = acl_common::generateLayer<arm_compute::CLUnstack>(input, outputs, axis);
- _return_fn = asAclClFunction(std::move(fn));
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Pad &node)
@@ -1669,11 +1373,11 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
auto quant_info = ::arm_compute::QuantizationInfo(input_type.scale(), input_type.offset());
const auto pixel_value = ::arm_compute::PixelValue(0, data_type, quant_info);
- auto input = _tensor_builder->at(input_index).get()->handle();
- auto output = _tensor_builder->at(output_index).get()->handle();
+ auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
+ auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
+ const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
::arm_compute::PaddingList padding_list;
padding_list.resize(rank);
@@ -1685,11 +1389,10 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
}
- auto fn = std::make_unique<::arm_compute::CLPadLayer>();
// Disable applied dim_correction
size_t input_rank = _ctx.at(input_index).shape().rank();
- const auto &input_tensor = _tensor_builder->at(input_index);
+ const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
assert(input_rank == input_tensor->num_dimensions());
if (input_rank != input_tensor->info()->num_dimensions())
{
@@ -1698,50 +1401,13 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
_ctx.at(input_index).shape(), frontend_layout, backend_layout, false));
}
- fn->configure(input, output, padding_list, pixel_value);
+ auto fn =
+ acl_common::generateLayer<arm_compute::CLPadLayer>(input, output, padding_list, pixel_value);
// Do not revert disabling applied dim_correction CLPadKernel has cl kernel for 4-dimension
// It would produce a mistach of result
- _return_fn = asAclClFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Min &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLElementwiseMin>();
-
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Max &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLElementwiseMax>();
-
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::ConvertFp32ToFp16 &node)
@@ -1749,17 +1415,13 @@ void KernelGenerator::visit(const ir::operation::ConvertFp32ToFp16 &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp32ToFp16::Input::INPUT)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLDepthConvertLayer>();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE,
- 0);
+ auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::ConvertFp16ToFp32 &node)
@@ -1767,17 +1429,13 @@ void KernelGenerator::visit(const ir::operation::ConvertFp16ToFp32 &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp16ToFp32::Input::INPUT)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLDepthConvertLayer>();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE,
- 0);
+ auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
} // namespace acl_cl
diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.h b/runtime/onert/backend/acl_cl/KernelGenerator.h
index 1e3b06489..d188d6d83 100644
--- a/runtime/onert/backend/acl_cl/KernelGenerator.h
+++ b/runtime/onert/backend/acl_cl/KernelGenerator.h
@@ -21,6 +21,8 @@
#include "ir/Operands.h"
#include "TensorBuilder.h"
+#include "AclTensorRegistry.h"
+#include "TensorManager.h"
namespace onert
{
@@ -33,70 +35,52 @@ class KernelGenerator : public IKernelGenerator
{
public:
KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder);
+ const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg);
void visit(const ir::OpSequence &) override;
void visit(const ir::operation::BatchToSpaceND &) override;
+ void visit(const ir::operation::BinaryArithmetic &) override;
void visit(const ir::operation::Conv2D &) override;
void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::MaxPool2D &) override;
- void visit(const ir::operation::AvgPool2D &) override;
void visit(const ir::operation::Concat &) override;
void visit(const ir::operation::FullyConnected &) override;
- void visit(const ir::operation::Mul &) override;
void visit(const ir::operation::Reduce &) override;
void visit(const ir::operation::Reshape &) override;
void visit(const ir::operation::Squeeze &) override;
- void visit(const ir::operation::Tanh &) override;
void visit(const ir::operation::Softmax &) override;
void visit(const ir::operation::Slice &) override;
void visit(const ir::operation::StridedSlice &) override;
void visit(const ir::operation::Transpose &) override;
- void visit(const ir::operation::Add &) override;
- void visit(const ir::operation::Sub &) override;
- void visit(const ir::operation::Cast &) override;
- void visit(const ir::operation::Div &) override;
- void visit(const ir::operation::Exp &) override;
+ void visit(const ir::operation::ElementwiseActivation &) override;
+ void visit(const ir::operation::ElementwiseBinary &) override;
+ void visit(const ir::operation::ElementwiseUnary &) override;
void visit(const ir::operation::ExpandDims &) override;
void visit(const ir::operation::InstanceNorm &) override;
- void visit(const ir::operation::Logistic &) override;
void visit(const ir::operation::Comparison &) override;
- void visit(const ir::operation::LogicalAnd &) override;
void visit(const ir::operation::LSTM &) override;
void visit(const ir::operation::Pack &) override;
+ void visit(const ir::operation::Pool2D &) override;
void visit(const ir::operation::Permute &) override;
- void visit(const ir::operation::RSQRT &) override;
- void visit(const ir::operation::ReLU &) override;
void visit(const ir::operation::ResizeBilinear &) override;
- void visit(const ir::operation::ReLU1 &) override;
- void visit(const ir::operation::ReLU6 &) override;
+ void visit(const ir::operation::ResizeNearestNeighbor &) override;
void visit(const ir::operation::RNN &) override;
- void visit(const ir::operation::Floor &) override;
void visit(const ir::operation::SpaceToBatchND &) override;
void visit(const ir::operation::SpaceToDepth &) override;
- void visit(const ir::operation::L2Pool2D &) override;
void visit(const ir::operation::EmbeddingLookup &) override;
void visit(const ir::operation::L2Normalization &) override;
void visit(const ir::operation::HashtableLookup &) override;
void visit(const ir::operation::PReLU &) override;
void visit(const ir::operation::TransposeConv &) override;
- void visit(const ir::operation::SQRT &) override;
- void visit(const ir::operation::LogicalOr &) override;
- void visit(const ir::operation::LogicalNot &) override;
void visit(const ir::operation::SquaredDifference &) override;
void visit(const ir::operation::TopKV2 &) override;
void visit(const ir::operation::Gather &) override;
- void visit(const ir::operation::Neg &) override;
- void visit(const ir::operation::Abs &) override;
void visit(const ir::operation::ArgMax &) override;
- void visit(const ir::operation::Dequantize &) override;
void visit(const ir::operation::LocalResponseNormalization &) override;
void visit(const ir::operation::DepthToSpace &) override;
void visit(const ir::operation::Split &) override;
void visit(const ir::operation::Unpack &) override;
void visit(const ir::operation::Pad &) override;
- void visit(const ir::operation::Min &) override;
- void visit(const ir::operation::Max &) override;
void visit(const ir::operation::ConvertFp32ToFp16 &) override;
void visit(const ir::operation::ConvertFp16ToFp32 &) override;
@@ -104,6 +88,7 @@ private:
const ir::Operands &_ctx;
const ir::Operations &_operations_ctx;
std::shared_ptr<TensorBuilder> _tensor_builder;
+ std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg;
ir::Layout _current_op_seq_layout;
};
diff --git a/runtime/onert/backend/acl_cl/Optimizer.cc b/runtime/onert/backend/acl_cl/Optimizer.cc
index 6ba3143e8..9134d3fb8 100644
--- a/runtime/onert/backend/acl_cl/Optimizer.cc
+++ b/runtime/onert/backend/acl_cl/Optimizer.cc
@@ -19,7 +19,7 @@
#include "ParentInfo.h"
#include <cassert>
-#include <ir/LoweredGraph.h>
+#include <compiler/LoweredGraph.h>
#include <util/logging.h>
#include "AclSubTensorAnalyzer.h"
diff --git a/runtime/onert/backend/acl_cl/TensorManager.h b/runtime/onert/backend/acl_cl/TensorManager.h
index bdbd0364e..ab295dbec 100644
--- a/runtime/onert/backend/acl_cl/TensorManager.h
+++ b/runtime/onert/backend/acl_cl/TensorManager.h
@@ -56,7 +56,7 @@ using InternalBufferManager = acl_common::AclInternalBufferManager<
using TensorManager =
acl_common::AclTensorManager<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>;
-TensorManager *createTensorManager(bool is_linear_executor)
+inline TensorManager *createTensorManager(bool is_linear_executor)
{
if (is_linear_executor)
{
diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.cc b/runtime/onert/backend/acl_common/AclConstantInitializer.cc
new file mode 100644
index 000000000..6ad5b7b69
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclConstantInitializer.cc
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AclConstantInitializer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+AclConstantInitializer::AclConstantInitializer(const ir::Operands &operands,
+ const std::shared_ptr<ITensorRegistry> &tensor_reg)
+ : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
+{
+ // DO NOTHING
+}
+
+void AclConstantInitializer::copyInputInitialize(const ir::Operation &node, uint32_t index)
+{
+ assert(node.getInputs().size() > index);
+
+ const auto &input_index = node.getInputs().at(index);
+ const auto &input_obj = _operands.at(input_index);
+ registerCopyInitializer(input_index, input_obj);
+}
+
+void AclConstantInitializer::permuteInputInitialize(const ir::Operation &node, uint32_t index)
+{
+ assert(node.getInputs().size() > index);
+
+ const auto &input_index = node.getInputs().at(index);
+ const auto &input_obj = _operands.at(input_index);
+ registerPermuteInitializer(input_index, input_obj);
+}
+
+void AclConstantInitializer::visit(const ir::operation::BatchToSpaceND &node)
+{
+ const auto &block_size_index = node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE);
+ const auto &block_size_obj = _operands.at(block_size_index);
+
+ if (block_size_obj.isConstant())
+ {
+ _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
+ assert(model_obj.data());
+ const auto &shape = model_obj.shape();
+ const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
+ assert(model_obj.shape().rank() == 1);
+ obj.access([&](ITensor &tensor) {
+ for (size_t i = 0; i < shape.num_elements(); ++i)
+ {
+ const int32_t value = base[shape.num_elements() - i - 1];
+ int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
+ tensor.calcOffset({static_cast<int32_t>(i)}));
+ *into = value;
+ }
+ });
+ };
+ }
+}
+
+void AclConstantInitializer::visit(const ir::operation::Conv2D &node)
+{
+ permuteInputInitialize(node, ir::operation::Conv2D::KERNEL);
+ copyInputInitialize(node, ir::operation::Conv2D::BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
+{
+ permuteInputInitialize(node, ir::operation::DepthwiseConv2D::KERNEL);
+ copyInputInitialize(node, ir::operation::DepthwiseConv2D::BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::FullyConnected &node)
+{
+ copyInputInitialize(node, ir::operation::FullyConnected::WEIGHT);
+ copyInputInitialize(node, ir::operation::FullyConnected::BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::LSTM &node)
+{
+ copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::INPUT_GATE_BIAS);
+ copyInputInitialize(node, ir::operation::LSTM::FORGET_GATE_BIAS);
+ copyInputInitialize(node, ir::operation::LSTM::OUTPUT_GATE_BIAS);
+ copyInputInitialize(node, ir::operation::LSTM::PROJECTION_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::PROJECTION_BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::RNN &node)
+{
+ copyInputInitialize(node, ir::operation::RNN::WEIGHTS);
+ copyInputInitialize(node, ir::operation::RNN::RECURRENT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::RNN::BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::TransposeConv &node)
+{
+ permuteInputInitialize(node, ir::operation::TransposeConv::KERNEL);
+}
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.h b/runtime/onert/backend/acl_common/AclConstantInitializer.h
new file mode 100644
index 000000000..52f4c54cf
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclConstantInitializer.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
+#define __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
+
+#include <backend/IConstantInitializer.h>
+#include <ir/Operands.h>
+#include "AclTensorRegistry.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+class AclConstantInitializer : public IConstantInitializer
+{
+public:
+ AclConstantInitializer(const ir::Operands &operands,
+ const std::shared_ptr<ITensorRegistry> &tensor_reg);
+
+public:
+ void visit(const ir::operation::BatchToSpaceND &) override;
+ void visit(const ir::operation::Conv2D &) override;
+ void visit(const ir::operation::DepthwiseConv2D &) override;
+ void visit(const ir::operation::FullyConnected &) override;
+ void visit(const ir::operation::LSTM &) override;
+ void visit(const ir::operation::RNN &) override;
+ void visit(const ir::operation::TransposeConv &) override;
+
+protected:
+ void copyInputInitialize(const ir::Operation &node, uint32_t index);
+ void permuteInputInitialize(const ir::Operation &node, uint32_t index);
+
+private:
+ std::shared_ptr<ITensorRegistry> tensor_registry() const final { return _tensor_reg; }
+
+protected:
+ std::shared_ptr<ITensorRegistry> _tensor_reg;
+};
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/acl_common/AclFunction.h b/runtime/onert/backend/acl_common/AclFunction.h
index 85b18e847..94b65863a 100644
--- a/runtime/onert/backend/acl_common/AclFunction.h
+++ b/runtime/onert/backend/acl_common/AclFunction.h
@@ -47,12 +47,6 @@ private:
std::unique_ptr<::arm_compute::IFunction> _func;
};
-class AclClFunction : public AclFunction
-{
-public:
- using AclFunction::AclFunction;
-};
-
} // namespace acl_common
} // namespace backend
} // namespace onert
diff --git a/runtime/onert/backend/acl_common/AclKernelGen.h b/runtime/onert/backend/acl_common/AclKernelGen.h
index 9f7ce3764..372ce689e 100644
--- a/runtime/onert/backend/acl_common/AclKernelGen.h
+++ b/runtime/onert/backend/acl_common/AclKernelGen.h
@@ -30,11 +30,32 @@ namespace backend
namespace acl_common
{
+template <typename Layer, typename... Args>
+std::unique_ptr<arm_compute::IFunction> generateLayer(Args &&... args)
+{
+ auto l = std::make_unique<Layer>();
+
+ l->configure(std::forward<Args>(args)...);
+
+ return l;
+}
+
+template <typename Layer, typename... Args>
+std::unique_ptr<arm_compute::IFunction>
+generateLayer(std::shared_ptr<arm_compute::IMemoryManager> memory_manager, Args &&... args)
+{
+ auto l = std::make_unique<Layer>(memory_manager);
+
+ l->configure(std::forward<Args>(args)...);
+
+ return l;
+}
+
template <typename T_FunctionWrapper, typename T_Tensor, typename T_ACLLayer,
- typename T_TensorBuilder>
-std::unique_ptr<exec::IFunction>
-kernelGenLSTM(const ir::operation::LSTM &node, const ir::Operands &operands,
- const std::shared_ptr<T_TensorBuilder> &tensor_builder)
+ typename T_TensorRegistry>
+std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node,
+ const ir::Operands &operands,
+ const std::shared_ptr<T_TensorRegistry> &tensor_reg)
{
// TODO Support dynamic rnn
// TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection.
@@ -117,43 +138,44 @@ kernelGenLSTM(const ir::operation::LSTM &node, const ir::Operands &operands,
const auto projection_clip = projection_threshold;
assert(cell_clip >= 0.f && projection_clip >= 0.f);
- auto scratch_buffer_tensor = tensor_builder->at(scratch_buffer_index).get();
- auto output_state_out_tensor = tensor_builder->at(output_state_out_index).get();
- auto cell_state_out_tensor = tensor_builder->at(cell_state_out_index).get();
- auto output_tensor = tensor_builder->at(output_index).get();
+ auto scratch_buffer_tensor = tensor_reg->getAclTensor(scratch_buffer_index).get();
+ auto output_state_out_tensor = tensor_reg->getAclTensor(output_state_out_index).get();
+ auto cell_state_out_tensor = tensor_reg->getAclTensor(cell_state_out_index).get();
+ auto output_tensor = tensor_reg->getAclTensor(output_index).get();
- auto input_tensor = tensor_builder->at(input_index).get();
+ auto input_tensor = tensor_reg->getAclTensor(input_index).get();
- auto input_to_forget_weights_tensor = tensor_builder->at(input_to_forget_weights_index).get();
- auto input_to_cell_weights_tensor = tensor_builder->at(input_to_cell_weights_index).get();
- auto input_to_output_weights_tensor = tensor_builder->at(input_to_output_weights_index).get();
+ auto input_to_forget_weights_tensor =
+ tensor_reg->getAclTensor(input_to_forget_weights_index).get();
+ auto input_to_cell_weights_tensor = tensor_reg->getAclTensor(input_to_cell_weights_index).get();
+ auto input_to_output_weights_tensor =
+ tensor_reg->getAclTensor(input_to_output_weights_index).get();
auto recurrent_to_forget_weights_tensor =
- tensor_builder->at(recurrent_to_forget_weights_index).get();
- auto recurrent_to_cell_weights_tensor = tensor_builder->at(recurrent_to_cell_weights_index).get();
+ tensor_reg->getAclTensor(recurrent_to_forget_weights_index).get();
+ auto recurrent_to_cell_weights_tensor =
+ tensor_reg->getAclTensor(recurrent_to_cell_weights_index).get();
auto recurrent_to_output_weights_tensor =
- tensor_builder->at(recurrent_to_output_weights_index).get();
+ tensor_reg->getAclTensor(recurrent_to_output_weights_index).get();
- auto forget_gate_bias_tensor = tensor_builder->at(forget_gate_bias_index).get();
- auto cell_bias_tensor = tensor_builder->at(cell_bias_index).get();
- auto output_gate_bias_tensor = tensor_builder->at(output_gate_bias_index).get();
- auto output_state_in_tensor = tensor_builder->at(output_state_in_index).get();
- auto cell_state_in_tensor = tensor_builder->at(cell_state_in_index).get();
+ auto forget_gate_bias_tensor = tensor_reg->getAclTensor(forget_gate_bias_index).get();
+ auto cell_bias_tensor = tensor_reg->getAclTensor(cell_bias_index).get();
+ auto output_gate_bias_tensor = tensor_reg->getAclTensor(output_gate_bias_index).get();
+ auto output_state_in_tensor = tensor_reg->getAclTensor(output_state_in_index).get();
+ auto cell_state_in_tensor = tensor_reg->getAclTensor(cell_state_in_index).get();
- auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
-
- auto fn = std::make_unique<T_ACLLayer>();
+ auto act_info = asActivationLayerInfo(activation);
::arm_compute::LSTMParams<T_Tensor> lstm_params{};
if (has_cifg_param)
{
auto input_to_input_weights_tensor =
- tensor_builder->at(input_to_input_weights_index).get(); // optional
+ tensor_reg->getAclTensor(input_to_input_weights_index).get(); // optional
auto recurrent_to_input_weights_tensor =
- tensor_builder->at(recurrent_to_input_weights_index).get(); // optional
+ tensor_reg->getAclTensor(recurrent_to_input_weights_index).get(); // optional
auto cell_to_input_weights_handle =
- has_peephole_param ? tensor_builder->at(cell_to_input_weights_index).get()->handle()
+ has_peephole_param ? tensor_reg->getAclTensor(cell_to_input_weights_index).get()->handle()
: nullptr; // optional (non-cifg && peephole)
- auto input_gate_bias_tensor = tensor_builder->at(input_gate_bias_index).get(); // optional
+ auto input_gate_bias_tensor = tensor_reg->getAclTensor(input_gate_bias_index).get(); // optional
lstm_params.set_cifg_params(input_to_input_weights_tensor->handle(),
recurrent_to_input_weights_tensor->handle(),
cell_to_input_weights_handle, input_gate_bias_tensor->handle());
@@ -161,40 +183,42 @@ kernelGenLSTM(const ir::operation::LSTM &node, const ir::Operands &operands,
if (has_peephole_param)
{
auto cell_to_forget_weights_tensor =
- tensor_builder->at(cell_to_forget_weights_index).get(); // optional
+ tensor_reg->getAclTensor(cell_to_forget_weights_index).get(); // optional
auto cell_to_output_weights_tensor =
- tensor_builder->at(cell_to_output_weights_index).get(); // optional
+ tensor_reg->getAclTensor(cell_to_output_weights_index).get(); // optional
lstm_params.set_peephole_params(cell_to_forget_weights_tensor->handle(),
cell_to_output_weights_tensor->handle());
}
if (has_projection_param)
{
- auto projection_weights_tensor = tensor_builder->at(projection_weights_index).get(); // optional
- auto projection_bias_handle = has_projection_bias
- ? tensor_builder->at(projection_bias_index).get()->handle()
- : nullptr; // optional
+ auto projection_weights_tensor =
+ tensor_reg->getAclTensor(projection_weights_index).get(); // optional
+ auto projection_bias_handle =
+ has_projection_bias ? tensor_reg->getAclTensor(projection_bias_index).get()->handle()
+ : nullptr; // optional
lstm_params.set_projection_params(projection_weights_tensor->handle(), projection_bias_handle);
}
- fn->configure(input_tensor->handle(), input_to_forget_weights_tensor->handle(),
- input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(),
- recurrent_to_forget_weights_tensor->handle(),
- recurrent_to_cell_weights_tensor->handle(),
- recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(),
- cell_bias_tensor->handle(), output_gate_bias_tensor->handle(),
- output_state_in_tensor->handle(), cell_state_in_tensor->handle(),
- scratch_buffer_tensor->handle(), output_state_out_tensor->handle(),
- cell_state_out_tensor->handle(), output_tensor->handle(), lstm_params, act_info,
- cell_clip, projection_clip);
+ auto fn = generateLayer<T_ACLLayer>(
+ input_tensor->handle(), input_to_forget_weights_tensor->handle(),
+ input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(),
+ recurrent_to_forget_weights_tensor->handle(), recurrent_to_cell_weights_tensor->handle(),
+ recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(),
+ cell_bias_tensor->handle(), output_gate_bias_tensor->handle(),
+ output_state_in_tensor->handle(), cell_state_in_tensor->handle(),
+ scratch_buffer_tensor->handle(), output_state_out_tensor->handle(),
+ cell_state_out_tensor->handle(), output_tensor->handle(), lstm_params, act_info, cell_clip,
+ projection_clip);
return std::make_unique<T_FunctionWrapper>(std::move(fn));
}
template <typename T_FunctionWrapper, typename T_Tensor, typename T_ACLLayer,
- typename T_TensorBuilder>
+ typename T_TensorBuilder, typename T_TensorRegistry>
std::unique_ptr<exec::IFunction>
kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Operands &operands,
- const std::shared_ptr<T_TensorBuilder> &tensor_builder, ir::Layout layout)
+ const std::shared_ptr<T_TensorBuilder> &tensor_builder,
+ const std::shared_ptr<T_TensorRegistry> &tensor_reg, ir::Layout layout)
{
using ir::operation::FullyConnected;
@@ -236,16 +260,13 @@ kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Ope
reshape.dim(1) = input_size; /* W */
}
- auto output_tensor = tensor_builder->at(output_index).get();
- const auto input_tensor = tensor_builder->at(input_index).get();
- const auto weight_tensor = tensor_builder->at(weight_index).get();
- const auto bias_tensor = tensor_builder->at(bias_index).get();
+ auto output_tensor = tensor_reg->getAclTensor(output_index).get();
+ const auto input_tensor = tensor_reg->getAclTensor(input_index).get();
+ const auto weight_tensor = tensor_reg->getAclTensor(weight_index).get();
+ const auto bias_tensor = tensor_reg->getAclTensor(bias_index).get();
const auto frontend_layout = layout;
const auto acl_layout = output_tensor->handle()->info()->data_layout();
- auto fn =
- std::make_unique<T_ACLLayer>(tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
typename T_ACLLayer::KernelType kernel_type = T_ACLLayer::KernelType::GENERAL;
if (operands.at(weight_index).isConstant())
{
@@ -253,20 +274,18 @@ kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Ope
assert(operands.at(weight_index).data());
}
- fn->configure(
- input_tensor->handle(), weight_tensor->handle(), bias_tensor->handle(),
- output_tensor->handle(), needs_reshape,
- ::onert::backend::acl_common::asTensorShape(
- reshape, frontend_layout, ::onert::backend::acl_common::asRuntimeLayout(acl_layout)),
- kernel_type);
+ auto fn = generateLayer<T_ACLLayer>(
+ tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+ weight_tensor->handle(), bias_tensor->handle(), output_tensor->handle(), needs_reshape,
+ asTensorShape(reshape, frontend_layout, asRuntimeLayout(acl_layout)), kernel_type);
return std::make_unique<T_FunctionWrapper>(std::move(fn));
}
-template <typename T_ACLLayer, typename T_PoolOp, typename T_TensorBuilder>
+template <typename T_ACLLayer, typename T_PoolOp, typename T_AclTensorRegistry>
std::unique_ptr<::arm_compute::IFunction>
kernelGenPool2D(const T_PoolOp &node, const ir::Operands &operands,
- const std::shared_ptr<T_TensorBuilder> &tensor_builder, ir::Layout layout,
+ const std::shared_ptr<T_AclTensorRegistry> &tensor_reg, ir::Layout layout,
::arm_compute::PoolingType pooling_type)
{
const auto ofm_index{node.getOutputs().at(0)};
@@ -294,16 +313,14 @@ kernelGenPool2D(const T_PoolOp &node, const ir::Operands &operands,
VERBOSE(Pool2DParam) << "PAD(L): " << padding.left << std::endl;
VERBOSE(Pool2DParam) << "PAD(R): " << padding.right << std::endl;
- auto ofm_tensor = tensor_builder->at(ofm_index).get();
- auto ifm_tensor = tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = tensor_reg->getAclTensor(ifm_index).get();
::arm_compute::PoolingLayerInfo info{
pooling_type, ::arm_compute::Size2D{kw, kh}, ifm_tensor->info()->data_layout(),
- acl_common::asPadStrideInfo(padding, stride), true /* exclude_padding */};
-
- auto fn = std::make_unique<T_ACLLayer>();
+ asPadStrideInfo(padding, stride), true /* exclude_padding */};
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), info);
+ auto fn = generateLayer<T_ACLLayer>(ifm_tensor->handle(), ofm_tensor->handle(), info);
return fn;
}
diff --git a/runtime/onert/backend/acl_common/AclTensorBuilder.h b/runtime/onert/backend/acl_common/AclTensorBuilder.h
index 6b03fdf7f..91452014b 100644
--- a/runtime/onert/backend/acl_common/AclTensorBuilder.h
+++ b/runtime/onert/backend/acl_common/AclTensorBuilder.h
@@ -25,6 +25,7 @@
#include "ir/OperandIndexMap.h"
#include <ir/Operands.h>
#include "AclTensorManager.h"
+#include "AclTensorRegistry.h"
#include <memory>
#include "ParentInfo.h"
#include <util/Utils.h>
@@ -48,7 +49,8 @@ class AclTensorBuilder : public ITensorBuilder
public:
using T_AclTensorManager = AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>;
- AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr);
+ AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr,
+ const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg);
/**
* @brief Register tensor information to allocate on ACL-CL backend
@@ -63,19 +65,13 @@ public:
void notifyLastUse(const ir::OperandIndex &) override;
bool isRegistered(const ir::OperandIndex &) const override;
- std::shared_ptr<backend::ITensorRegistry> tensorRegistry() override { return nullptr; }
void prepare(void) override;
void allocate() override;
void postFunctionPrepare() override;
- std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) override;
- void iterate(const IterateFunction &fn) override;
-
std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) override;
- std::shared_ptr<T_ITensor> at(const ir::OperandIndex &ind);
-
T_AclTensorManager *acl_tensor_manager(void) { return _tensor_mgr.get(); }
void setUsesCount(const ir::OperandIndex &index, size_t num_uses)
@@ -100,8 +96,6 @@ public:
*/
bool isSubTensorOf(const ir::OperandIndex &parent, const ir::OperandIndex &child);
- bool supportDynamicTensor() override { return false; }
-
private:
void buildTensors(void);
ir::OperandIndex findRootParent(ir::OperandIndex index);
@@ -113,6 +107,7 @@ private:
ir::OperandIndexMap<size_t> _uses_count_map;
std::unique_ptr<T_AclTensorManager> _tensor_mgr;
+ std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> _tensor_reg;
// for linear executor
std::vector<std::pair<UsesType, ir::OperandIndex>> _lifetime_seq;
@@ -140,9 +135,10 @@ namespace acl_common
{
template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(const ir::Operands &operands,
- T_AclTensorManager *tensor_mgr)
- : _operands{operands}, _tensor_mgr{tensor_mgr}
+AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(
+ const ir::Operands &operands, T_AclTensorManager *tensor_mgr,
+ const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg)
+ : _operands{operands}, _tensor_mgr{tensor_mgr}, _tensor_reg{tensor_reg}
{
assert(_tensor_mgr);
}
@@ -310,28 +306,6 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::postFunctionPrepare(voi
}
template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-std::shared_ptr<ITensor>
-AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::tensorAt(const ir::OperandIndex &ind)
-{
- return _tensor_mgr->at(ind);
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::iterate(const IterateFunction &fn)
-{
- _tensor_mgr->iterate(fn);
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-std::shared_ptr<T_ITensor>
-AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::at(const ir::OperandIndex &ind)
-{
- auto ret = _tensor_mgr->at(ind);
- assert(ret != nullptr);
- return ret;
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
std::unique_ptr<ITensorManager>
AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::releaseStaticTensorManager(void)
{
diff --git a/runtime/onert/backend/acl_common/AclTensorRegistry.h b/runtime/onert/backend/acl_common/AclTensorRegistry.h
new file mode 100644
index 000000000..1ef9f4b35
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclTensorRegistry.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_COMMON_ACL_TENSOR_REGISTRY_H__
+#define __ONERT_BACKEND_ACL_COMMON_ACL_TENSOR_REGISTRY_H__
+
+#include "backend/ITensorRegistry.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+/**
+ * @brief Tensor registry class for acl backends
+ *
+ * This is implemented as a wrapper of AclTensorManager.
+ */
+template <typename T_AclTensorManager> class AclTensorRegistry : public ITensorRegistry
+{
+public:
+ AclTensorRegistry(T_AclTensorManager *tensor_mgr) : _tensor_mgr{tensor_mgr} {}
+
+ std::shared_ptr<ITensor> getITensor(const ir::OperandIndex &ind) override
+ {
+ return _tensor_mgr->at(ind);
+ }
+
+ std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &ind) override
+ {
+ return getITensor(ind);
+ }
+
+ auto getAclTensor(const ir::OperandIndex &ind) { return _tensor_mgr->at(ind); }
+
+private:
+ T_AclTensorManager *_tensor_mgr;
+};
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_COMMON_ACL_TENSOR_REGISTRY_H__
diff --git a/runtime/onert/backend/acl_common/Convert.cc b/runtime/onert/backend/acl_common/Convert.cc
index a5bbe1691..67dcc8192 100644
--- a/runtime/onert/backend/acl_common/Convert.cc
+++ b/runtime/onert/backend/acl_common/Convert.cc
@@ -18,6 +18,7 @@
#include "Swizzle.h"
#include "ir/DataType.h"
+#include "ir/operation/ElementwiseActivation.h"
#include <memory>
namespace
@@ -177,6 +178,50 @@ namespace acl_common
}
}
+::arm_compute::ActivationLayerInfo
+asActivationLayerInfo(const ir::operation::ElementwiseActivation::Type op_type, float alpha,
+ float beta)
+{
+ switch (op_type)
+ {
+ case ir::operation::ElementwiseActivation::Type::RELU:
+ if (beta == 0.f)
+ {
+ if (alpha == ir::operation::ElementwiseActivation::infinity)
+ {
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
+ }
+ else
+ {
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, alpha};
+ }
+ }
+ else
+ {
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, alpha, beta};
+ }
+ case ir::operation::ElementwiseActivation::Type::TANH:
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, alpha, beta};
+ case ir::operation::ElementwiseActivation::Type::LOGISTIC:
+ // NOTE The sigmoid function is a special case of the Logistic function when L=1, k=1, x0=0.
+ // TODO In ACL and nnapi sepc, currently, Logistic's L always is 1, k always is 1, x0 always
+ // 0(always sigmoid) regardless of values of the parameter.
+ // If ACL support non-sigmoid logistic, should fix param values.
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
+ case ir::operation::ElementwiseActivation::Type::LEAKY_RELU:
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LEAKY_RELU, alpha};
+ default:
+ throw std::runtime_error{"Not supported, yet"};
+ break;
+ }
+}
+
arm_compute::Coordinates asCoordinates(const ir::Operand &operand, int32_t rank,
ir::Layout frontend_layout, ir::Layout backend_layout)
{
@@ -223,11 +268,6 @@ std::unique_ptr<AclFunction> asAclFunction(std::unique_ptr<::arm_compute::IFunct
return std::make_unique<AclFunction>(std::move(layer));
}
-std::unique_ptr<AclClFunction> asAclClFunction(std::unique_ptr<::arm_compute::IFunction> &&layer)
-{
- return std::make_unique<AclClFunction>(std::move(layer));
-}
-
ir::Layout asRuntimeLayout(::arm_compute::DataLayout data_layout)
{
switch (data_layout)
@@ -265,6 +305,21 @@ ir::DataType asRuntimeDataType(::arm_compute::DataType data_type)
}
}
+arm_compute::PoolingType convertPoolType(ir::operation::Pool2D::PoolType pool_type_ir)
+{
+ switch (pool_type_ir)
+ {
+ case ir::operation::Pool2D::PoolType::AVG:
+ return arm_compute::PoolingType::AVG;
+ case ir::operation::Pool2D::PoolType::L2:
+ return arm_compute::PoolingType::L2;
+ case ir::operation::Pool2D::PoolType::MAX:
+ return arm_compute::PoolingType::MAX;
+ default:
+ throw std::runtime_error("convertPoolType: Not supported operation yet");
+ }
+}
+
arm_compute::ReduceOperation convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
{
switch (reduce_type_ir)
diff --git a/runtime/onert/backend/acl_common/Convert.h b/runtime/onert/backend/acl_common/Convert.h
index 9362098a5..380321c07 100644
--- a/runtime/onert/backend/acl_common/Convert.h
+++ b/runtime/onert/backend/acl_common/Convert.h
@@ -25,7 +25,9 @@
#include "ir/Layout.h"
#include "ir/InternalType.h"
#include "ir/Operand.h"
+#include "ir/operation/Pool2D.h"
#include "ir/operation/Reduce.h"
+#include "ir/operation/ElementwiseActivation.h"
#include "ir/Shape.h"
#include "ir/TypeInfo.h"
#include "ir/Coordinates.h"
@@ -59,6 +61,9 @@ namespace acl_common
const ir::Stride &stride);
::arm_compute::ActivationLayerInfo asActivationLayerInfo(ir::Activation act_code);
+::arm_compute::ActivationLayerInfo
+asActivationLayerInfo(const ir::operation::ElementwiseActivation::Type op_type, float alpha,
+ float beta);
arm_compute::Coordinates asCoordinates(const ir::Operand &operand, int32_t rank,
ir::Layout frontend_layout, ir::Layout backend_layout);
@@ -67,7 +72,6 @@ std::set<uint32_t> asSet(const ir::Operand &operand, int32_t rank, ir::Layout fr
ir::Layout backend_layout);
std::unique_ptr<AclFunction> asAclFunction(std::unique_ptr<::arm_compute::IFunction> &&layer);
-std::unique_ptr<AclClFunction> asAclClFunction(std::unique_ptr<::arm_compute::IFunction> &&layer);
template <typename T_Function>
std::unique_ptr<T_Function> asFunction(std::unique_ptr<::arm_compute::IFunction> &&fn)
@@ -78,6 +82,7 @@ std::unique_ptr<T_Function> asFunction(std::unique_ptr<::arm_compute::IFunction>
ir::Layout asRuntimeLayout(::arm_compute::DataLayout data_layout);
ir::DataType asRuntimeDataType(::arm_compute::DataType data_type);
+arm_compute::PoolingType convertPoolType(ir::operation::Pool2D::PoolType pool_type_ir);
arm_compute::ReduceOperation convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir);
} // namespace acl_common
diff --git a/runtime/onert/backend/acl_neon/Backend.h b/runtime/onert/backend/acl_neon/Backend.h
index a0b145e19..35d6e4e8e 100644
--- a/runtime/onert/backend/acl_neon/Backend.h
+++ b/runtime/onert/backend/acl_neon/Backend.h
@@ -48,10 +48,13 @@ public:
const auto &operands = graph.operands();
const auto &operations = graph.operations();
auto context = std::make_unique<BackendContext>(this, &graph);
- auto tb = std::make_shared<TensorBuilder>(operands, createTensorManager(is_linear_executor));
+ auto tm = createTensorManager(is_linear_executor);
+ auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
+ auto tb = std::make_shared<TensorBuilder>(operands, tm, tr);
+ context->tensor_registry = tr;
context->tensor_builder = tb;
- context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb);
- context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb);
+ context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+ context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr);
context->tensor_register = nullptr;
context->optimizer = std::make_shared<Optimizer>(context.get());
return context;
diff --git a/runtime/onert/backend/acl_neon/ConstantInitializer.cc b/runtime/onert/backend/acl_neon/ConstantInitializer.cc
index 4191b277f..79edb9ded 100644
--- a/runtime/onert/backend/acl_neon/ConstantInitializer.cc
+++ b/runtime/onert/backend/acl_neon/ConstantInitializer.cc
@@ -24,100 +24,12 @@ namespace acl_neon
{
ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
- : IConstantInitializer{operands}, _tensor_builder{tensor_builder}
+ const std::shared_ptr<ITensorRegistry> &tensor_reg)
+ : acl_common::AclConstantInitializer{operands, tensor_reg}
{
// DO NOTHING
}
-void ConstantInitializer::copyInputInitialize(const ir::Operation &node, uint32_t index)
-{
- assert(node.getInputs().size() > index);
-
- const auto &input_index = node.getInputs().at(index);
- const auto &input_obj = _operands.at(input_index);
- registerCopyInitializer(input_index, input_obj);
-}
-
-void ConstantInitializer::permuteInputInitialize(const ir::Operation &node, uint32_t index)
-{
- assert(node.getInputs().size() > index);
-
- const auto &input_index = node.getInputs().at(index);
- const auto &input_obj = _operands.at(input_index);
- registerPermuteInitializer(input_index, input_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::BatchToSpaceND &node)
-{
- const auto &block_size_index = node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE);
- const auto &block_size_obj = _operands.at(block_size_index);
-
- if (block_size_obj.isConstant())
- {
- _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
- assert(model_obj.data());
- const auto &shape = model_obj.shape();
- const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
- assert(model_obj.shape().rank() == 1);
- obj.access([&](ITensor &tensor) {
- for (size_t i = 0; i < shape.num_elements(); ++i)
- {
- const int32_t value = base[shape.num_elements() - i - 1];
- int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
- tensor.calcOffset({static_cast<int32_t>(i)}));
- *into = value;
- }
- });
- };
- }
-}
-
-void ConstantInitializer::visit(const ir::operation::Conv2D &node)
-{
- permuteInputInitialize(node, ir::operation::Conv2D::KERNEL);
- copyInputInitialize(node, ir::operation::Conv2D::BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
-{
- permuteInputInitialize(node, ir::operation::DepthwiseConv2D::KERNEL);
- copyInputInitialize(node, ir::operation::DepthwiseConv2D::BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::FullyConnected &node)
-{
- copyInputInitialize(node, ir::operation::FullyConnected::WEIGHT);
- copyInputInitialize(node, ir::operation::FullyConnected::BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::LSTM &node)
-{
- copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::INPUT_GATE_BIAS);
- copyInputInitialize(node, ir::operation::LSTM::FORGET_GATE_BIAS);
- copyInputInitialize(node, ir::operation::LSTM::OUTPUT_GATE_BIAS);
- copyInputInitialize(node, ir::operation::LSTM::PROJECTION_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::PROJECTION_BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::RNN &node)
-{
- copyInputInitialize(node, ir::operation::RNN::WEIGHTS);
- copyInputInitialize(node, ir::operation::RNN::RECURRENT_WEIGHTS);
- copyInputInitialize(node, ir::operation::RNN::BIAS);
-}
-
void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
{
const auto &block_size_index = node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE);
@@ -173,11 +85,6 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
}
}
-void ConstantInitializer::visit(const ir::operation::TransposeConv &node)
-{
- permuteInputInitialize(node, ir::operation::TransposeConv::KERNEL);
-}
-
} // namespace acl_neon
} // namespace backend
} // namespace onert
diff --git a/runtime/onert/backend/acl_neon/ConstantInitializer.h b/runtime/onert/backend/acl_neon/ConstantInitializer.h
index 6b4c1f145..c7d71cdcf 100644
--- a/runtime/onert/backend/acl_neon/ConstantInitializer.h
+++ b/runtime/onert/backend/acl_neon/ConstantInitializer.h
@@ -17,9 +17,7 @@
#ifndef __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
#define __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
-#include <backend/IConstantInitializer.h>
-#include <ir/Operands.h>
-#include "TensorBuilder.h"
+#include "AclConstantInitializer.h"
namespace onert
{
@@ -28,29 +26,15 @@ namespace backend
namespace acl_neon
{
-class ConstantInitializer : public IConstantInitializer
+class ConstantInitializer : public acl_common::AclConstantInitializer
{
public:
ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder);
+ const std::shared_ptr<ITensorRegistry> &tensor_reg);
public:
- void visit(const ir::operation::BatchToSpaceND &) override;
- void visit(const ir::operation::Conv2D &) override;
- void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::FullyConnected &) override;
- void visit(const ir::operation::LSTM &) override;
- void visit(const ir::operation::RNN &) override;
- void visit(const ir::operation::SpaceToBatchND &) override;
- void visit(const ir::operation::TransposeConv &) override;
-
-private:
- std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
- void copyInputInitialize(const ir::Operation &node, uint32_t index);
- void permuteInputInitialize(const ir::Operation &node, uint32_t index);
-
-private:
- std::shared_ptr<TensorBuilder> _tensor_builder;
+ using acl_common::AclConstantInitializer::visit;
+ void visit(const ir::operation::SpaceToBatchND &node) final;
};
} // namespace acl_neon
diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.cc b/runtime/onert/backend/acl_neon/KernelGenerator.cc
index 1195b83cc..6d53c1245 100644
--- a/runtime/onert/backend/acl_neon/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_neon/KernelGenerator.cc
@@ -44,11 +44,12 @@ using ::onert::backend::acl_common::asAclFunction;
using ActivationBuilder = ::onert::backend::acl_common::AclActivationBuilder<
::arm_compute::ITensor, ::arm_compute::NEActivationLayer, acl_common::AclFunction>;
-KernelGenerator::KernelGenerator(const ir::Operands &operands_ctx,
- const ir::Operations &operations_ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
+KernelGenerator::KernelGenerator(
+ const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+ const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
: _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder),
- _current_op_seq_layout(ir::Layout::UNKNOWN)
+ _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN)
{
// DO NOTHING
}
@@ -70,26 +71,6 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
}
}
-void KernelGenerator::visit(const ir::operation::Abs &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
-
- auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
void KernelGenerator::visit(const ir::operation::ArgMax &node)
{
const auto ofm_index{node.getOutputs().at(0)};
@@ -97,8 +78,8 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
auto frontend_layout = _current_op_seq_layout;
auto backend_layout = ifm_tensor->layout();
@@ -111,14 +92,11 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
const auto fixed_axis =
acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
- auto fn = std::make_unique<::arm_compute::NEArgMinMaxLayer>();
-
- fn->configure(ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(),
- arm_compute::ReductionOperation::ARG_IDX_MAX);
+ auto fn = acl_common::generateLayer<arm_compute::NEArgMinMaxLayer>(
+ ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(),
+ arm_compute::ReductionOperation::ARG_IDX_MAX);
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
@@ -128,50 +106,67 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
const auto block_size_index{
node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto block_size_tensor = _tensor_builder->at(block_size_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
assert(_ctx.at(block_size_index).data());
- auto fn = std::make_unique<::arm_compute::NEBatchToSpaceLayer>();
-
- fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
+ auto fn = acl_common::generateLayer<arm_compute::NEBatchToSpaceLayer>(
+ ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::Cast &node)
+void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
+ const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- std::unique_ptr<::arm_compute::IFunction> fn;
- if (ifm_tensor->data_type() == ofm_tensor->data_type())
- {
- auto l = std::make_unique<::arm_compute::NECopy>();
+ const auto activation = node.param().activation;
- l->configure(ifm_tensor->handle(), ofm_tensor->handle());
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
- fn = std::move(l);
- }
- else
+ std::unique_ptr<arm_compute::IFunction> fn;
+ switch (node.param().arithmetic_type)
{
- auto l = std::make_unique<::arm_compute::NECast>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
-
- fn = std::move(l);
+ case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEArithmeticAddition>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+ arm_compute::ConvertPolicy::SATURATE);
+ break;
+ }
+ case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEArithmeticSubtraction>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+ arm_compute::ConvertPolicy::SATURATE);
+ break;
+ }
+ case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+ {
+ // RoundingPolicy for scale:1.0 is only allowed RoundingPolicy::TO_ZERO
+ fn = acl_common::generateLayer<arm_compute::NEPixelWiseMultiplication>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
+ arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
+ break;
+ }
+ case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEElementwiseDivision>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+ break;
+ }
+ default:
+ assert(false && "The BinaryArithmetic operation supports only binary arithmetic operations");
+ break;
}
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = std::make_unique<exec::FunctionSequence>(
+ asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Conv2D &node)
@@ -195,20 +190,18 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
ker_width, ker_height);
const auto activation = node.param().activation;
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto ker_tensor = _tensor_builder->at(ker_index).get();
- auto bias_tensor = _tensor_builder->at(bias_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
+ auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
const auto act_info = acl_common::asActivationLayerInfo(activation);
- auto fn = std::make_unique<::arm_compute::NEConvolutionLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
- ofm_tensor->handle(), conv_info, ::arm_compute::WeightsInfo(),
- ::arm_compute::Size2D(1U, 1U), act_info);
+ auto fn = acl_common::generateLayer<arm_compute::NEConvolutionLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
+ ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info,
+ ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
_return_fn = asAclFunction(std::move(fn));
}
@@ -221,16 +214,13 @@ void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
auto block_size = node.param().block_size;
assert(block_size > 0);
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
- auto fn = std::make_unique<::arm_compute::NEDepthToSpaceLayer>();
+ auto fn = acl_common::generateLayer<arm_compute::NEDepthToSpaceLayer>(
+ input_tensor->handle(), output_tensor->handle(), block_size);
- fn->configure(input_tensor->handle(), output_tensor->handle(), block_size);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
@@ -255,67 +245,23 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
const auto multiplier = node.param().multiplier;
const auto activation = node.param().activation;
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto ker_tensor = _tensor_builder->at(ker_index).get();
- auto bias_tensor = _tensor_builder->at(bias_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
+ auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
const auto act_info = acl_common::asActivationLayerInfo(activation);
{
- auto fn = std::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
-
- fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
- ofm_tensor->handle(), conv_info, multiplier, act_info);
+ auto fn = acl_common::generateLayer<arm_compute::NEDepthwiseConvolutionLayer>(
+ ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(),
+ conv_info, multiplier, act_info);
_return_fn = asAclFunction(std::move(fn));
}
}
-void KernelGenerator::visit(const ir::operation::Dequantize &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEDequantizationLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
-{
- auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
- node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::MAX);
-
- const auto ofm_index{node.getOutputs().at(0)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- const auto activation = node.param().activation;
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(raw_fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
-{
- auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
- node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::AVG);
-
- const auto ofm_index{node.getOutputs().at(0)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- const auto activation = node.param().activation;
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(raw_fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
void KernelGenerator::visit(const ir::operation::Concat &node)
{
const auto ofm_index{node.getOutputs().at(0)};
@@ -336,80 +282,223 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
return;
}
- auto output_tensor = _tensor_builder->at(ofm_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(ofm_index).get();
std::vector<::arm_compute::ITensor *> input_tensors;
for (const auto &ifm_ind : input_indexes)
- input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle());
+ input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle());
std::unique_ptr<::arm_compute::IFunction> fn;
if (input_indexes.size() < 2)
{
- auto l = std::make_unique<::arm_compute::NECopy>();
- l->configure(input_tensors.at(0), output_tensor->handle());
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NECopy>(input_tensors.at(0),
+ output_tensor->handle());
}
else
{
- auto l = std::make_unique<::arm_compute::NEConcatenateLayer>();
const auto rank = _ctx.at(ofm_index).shape().rank();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = output_tensor->layout();
const auto fixed_axis =
acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
- l->configure(input_tensors, output_tensor->handle(), fixed_axis);
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NEConcatenateLayer>(
+ input_tensors, output_tensor->handle(), fixed_axis);
}
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
{
- const auto output_index{node.getOutputs().at(0)};
- const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
- const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
+
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+
+ const ::arm_compute::ActivationLayerInfo act_info = acl_common::asActivationLayerInfo(
+ node.param().op_type, node.param().alpha, node.param().beta);
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto lookups_tensor = _tensor_builder->at(lookups_index).get();
- auto values_tensor = _tensor_builder->at(values_index).get();
+ std::unique_ptr<arm_compute::IFunction> fn;
+ if (node.param().op_type == ir::operation::ElementwiseActivation::Type::LOGISTIC)
+ {
+ // NOTE NEActivationLayer can generate produce erroneous results. it were caused by
+ // 'vexpq_f32()'.
+ // The neon function returns a value outside of the limit of representation in float as 'NaN'
+ // instead of 'INF', and then the result of this op will be errors due to the 'NaN'.
+ fn = acl_common::generateLayer<arm_compute::NEActivationLayerEx>(
+ ifm_tensor->handle(), ofm_tensor->handle(), act_info);
+ }
+ else
+ {
+ fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(ifm_tensor->handle(),
+ ofm_tensor->handle(), act_info);
+ }
- auto fn = std::make_unique<::arm_compute::NEEmbeddingLookup>();
+ _return_fn = asAclFunction(std::move(fn));
+}
- fn->configure(values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
+void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
- auto acl_fn = asAclFunction(std::move(fn));
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
- _return_fn = std::move(acl_fn);
+ std::unique_ptr<arm_compute::IFunction> fn;
+ switch (node.param().op_type)
+ {
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
+ {
+ fn = acl_common::generateLayer<arm_compute::NELogicalAnd>(
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
+ {
+ fn = acl_common::generateLayer<arm_compute::NELogicalOr>(
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEElementwiseMax>(
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEElementwiseMin>(
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ break;
+ }
+ default:
+ {
+ std::string err_msg("acl_neon KernelGenerator : " + node.name() +
+ "is not elementwise-binary operations");
+ assert(false && err_msg.c_str());
+ break;
+ }
+ }
+ _return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::Floor &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)};
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
+
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+
+ std::unique_ptr<arm_compute::IFunction> fn;
+ switch (node.param().op_type)
+ {
+ case ir::operation::ElementwiseUnary::Type::ABS:
+ {
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(
+ input_tensor->handle(), output_tensor->handle(), act_info);
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::CAST:
+ {
+ if (input_tensor->data_type() == output_tensor->data_type())
+ {
+ fn = acl_common::generateLayer<arm_compute::NECopy>(input_tensor->handle(),
+ output_tensor->handle());
+ }
+ else
+ {
+ fn = acl_common::generateLayer<arm_compute::NECast>(
+ input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
+ }
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::DEQUANTIZE:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEDequantizationLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::EXP:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEExpLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::FLOOR:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEFloor>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEBitwiseNot>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::NEG:
+ {
+ fn = acl_common::generateLayer<arm_compute::NENegLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::RSQRT:
+ {
+ fn = acl_common::generateLayer<arm_compute::NERsqrtLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::SQRT:
+ {
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
- auto fn = std::make_unique<::arm_compute::NEFloor>();
+ fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(
+ input_tensor->handle(), output_tensor->handle(), act_info);
+ break;
+ }
+ default:
+ {
+ throw std::runtime_error("acl_neon KernelGenerator : " + node.name() +
+ "is not supported yet");
+ break;
+ }
+ }
+ _return_fn = asAclFunction(std::move(fn));
+}
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
+void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
+ const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
- auto acl_fn = asAclFunction(std::move(fn));
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
+ auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
- _return_fn = std::move(acl_fn);
+ auto fn = acl_common::generateLayer<arm_compute::NEEmbeddingLookup>(
+ values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
+
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::FullyConnected &node)
{
const auto output_index{node.getOutputs().at(0)};
- auto output_tensor = _tensor_builder->at(output_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
const auto activation = node.param().activation;
auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ITensor,
::arm_compute::NEFullyConnectedReshapingLayer>(
- node, _ctx, _tensor_builder, _current_op_seq_layout);
+ node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout);
_return_fn = std::make_unique<exec::FunctionSequence>(
std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
}
@@ -423,21 +512,18 @@ void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto hits_tensor = _tensor_builder->at(hits_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto hits_tensor = _tensor_reg->getAclTensor(hits_index).get();
- auto lookups_tensor = _tensor_builder->at(lookups_index).get();
- auto keys_tensor = _tensor_builder->at(keys_index).get();
- auto values_tensor = _tensor_builder->at(values_index).get();
+ auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
+ auto keys_tensor = _tensor_reg->getAclTensor(keys_index).get();
+ auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
- auto fn = std::make_unique<::arm_compute::NEHashtableLookup>();
+ auto fn = acl_common::generateLayer<arm_compute::NEHashtableLookup>(
+ lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
+ output_tensor->handle(), hits_tensor->handle());
- fn->configure(lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
- output_tensor->handle(), hits_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Gather &node)
@@ -453,9 +539,9 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
// Converting in reverse order
const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto indices_tensor = _tensor_builder->at(indices_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto indices_tensor = _tensor_reg->getAclTensor(indices_index).get();
const auto backend_layout = ofm_tensor->layout();
UNUSED_RELEASE(backend_layout);
@@ -471,8 +557,6 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
assert(backend_layout == indices_tensor->layout());
assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
- auto fn = std::make_unique<::arm_compute::NEGatherEx>();
-
// input is n-D, indices k-D, output is (n + k - 1)-D
size_t n = ifm_rank;
assert(n == ifm_tensor->num_dimensions());
@@ -495,15 +579,14 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
acl_common::asTensorShape(indices.shape(), _current_op_seq_layout, backend_layout, false));
}
- fn->configure(ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
+ auto fn = acl_common::generateLayer<arm_compute::NEGatherEx>(
+ ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
// acl_neon doesn't not revert disabling applied dim_correction because acl_neon's kernels would
// use arm_compute::TensorInfo::offset_element_in_bytes()
// It would create an error when the kernel accesses high dimension that its value is 1
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
@@ -513,17 +596,16 @@ void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto gamma_tensor = _tensor_builder->at(gamma_index).get();
- auto beta_tensor = _tensor_builder->at(beta_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto gamma_tensor = _tensor_reg->getAclTensor(gamma_index).get();
+ auto beta_tensor = _tensor_reg->getAclTensor(beta_index).get();
auto epsilon = node.param().epsilon;
auto activation = node.param().activation;
- auto fn = std::make_unique<::arm_compute::NEInstanceNormalizationLayerEx>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(),
- beta_tensor->handle(), epsilon);
+ auto fn = acl_common::generateLayer<arm_compute::NEInstanceNormalizationLayerEx>(
+ ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(),
+ epsilon);
_return_fn = std::make_unique<exec::FunctionSequence>(
asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
@@ -548,32 +630,16 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction)
float bias = 0.0f; // Don't offset the reduction.
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
radius, alpha, beta, bias, false);
- auto fn = std::make_unique<::arm_compute::NENormalizationLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::NENormalizationLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::L2Pool2D &node)
-{
- auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
- node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::L2);
-
- const auto ofm_index{node.getOutputs().at(0)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- const auto activation = node.param().activation;
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(raw_fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node)
@@ -587,142 +653,22 @@ void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &nod
auto beta = node.param().beta;
auto bias = node.param().bias;
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
const auto norm_info = ::arm_compute::NormalizationLayerInfo(
::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
- auto fn = std::make_unique<::arm_compute::NENormalizationLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::NENormalizationLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalAnd &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)};
- const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input0_tensor = _tensor_builder->at(input0_index).get();
- auto input1_tensor = _tensor_builder->at(input1_index).get();
-
- auto fn = std::make_unique<::arm_compute::NELogicalAnd>();
-
- fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalNot &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEBitwiseNot>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalOr &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)};
- const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input0_tensor = _tensor_builder->at(input0_index).get();
- auto input1_tensor = _tensor_builder->at(input1_index).get();
-
- auto fn = std::make_unique<::arm_compute::NELogicalOr>();
-
- fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Logistic &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
-
- // NOTE NEActivationLayer can generate produce erroneous results. it were caused by 'vexpq_f32()'.
- // The neon function returns a value outside of the limit of representation in float as 'NaN'
- // instead of 'INF', and then the result of this op will be errors due to the 'NaN'.
- auto fn = std::make_unique<::arm_compute::NEActivationLayerEx>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::LSTM &node)
{
_return_fn = acl_common::kernelGenLSTM<acl_common::AclFunction, ::arm_compute::ITensor,
- ::arm_compute::NELSTMLayer>(node, _ctx, _tensor_builder);
-}
-
-void KernelGenerator::visit(const ir::operation::Mul &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEPixelWiseMultiplication>();
-
- // RoundingPolicy for scale:1.0 is only allowed RoundingPolicy::TO_ZERO
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
- arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
-
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::Neg &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::NENegLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ ::arm_compute::NELSTMLayer>(node, _ctx, _tensor_reg);
}
void KernelGenerator::visit(const ir::operation::Pack &node)
@@ -736,25 +682,23 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
for (const auto &input_index : node.getInputs())
input_indexes.emplace_back(input_index);
- auto output = _tensor_builder->at(output_index).get()->handle();
+ auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
std::vector<arm_compute::ITensor *> inputs;
for (const auto &input_index : input_indexes)
- inputs.emplace_back(_tensor_builder->at(input_index)->handle());
+ inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle());
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = _tensor_builder->at(output_index).get()->layout();
+ const auto backend_layout = _tensor_reg->getAclTensor(output_index).get()->layout();
if (axis < 0)
axis += output_rank;
axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
- auto fn = std::make_unique<::arm_compute::NEStackLayer>();
-
// Disable applied dim_correction
for (const auto &input_index : input_indexes)
{
size_t input_rank = _ctx.at(input_index).shape().rank();
- const auto &input_tensor = _tensor_builder->at(input_index);
+ const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
assert(input_rank == input_tensor->num_dimensions());
if (input_rank != input_tensor->info()->num_dimensions())
{
@@ -764,7 +708,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
}
}
- fn->configure(inputs, axis, output);
+ auto fn = acl_common::generateLayer<arm_compute::NEStackLayer>(inputs, axis, output);
// acl_neon doesn't not revert disabling applied dim_correction because acl_neon's kernels would
// use arm_compute::TensorInfo::offset_element_in_bytes()
@@ -783,8 +727,8 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
auto rank = _ctx.at(input_index).shape().rank();
auto pad_base = _ctx.at(pad_index).data()->base();
- auto input = _tensor_builder->at(input_index).get()->handle();
- auto output = _tensor_builder->at(output_index).get()->handle();
+ auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
+ auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
::arm_compute::PaddingList padding_list;
padding_list.resize(rank);
@@ -793,7 +737,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2);
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
+ const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
const auto axis =
acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
@@ -807,19 +751,33 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
const auto pixel_value =
::arm_compute::PixelValue(0, input->info()->data_type(), input->info()->quantization_info());
- auto fn = std::make_unique<::arm_compute::NEPadLayer>();
- fn->configure(input, output, padding_list, pixel_value);
+ auto fn =
+ acl_common::generateLayer<arm_compute::NEPadLayer>(input, output, padding_list, pixel_value);
_return_fn = asAclFunction(std::move(fn));
}
+void KernelGenerator::visit(const ir::operation::Pool2D &node)
+{
+ auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
+ node, _ctx, _tensor_reg, _current_op_seq_layout,
+ acl_common::convertPoolType(node.param().op_type));
+
+ const auto ofm_index{node.getOutputs().at(0)};
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ const auto activation = node.param().activation;
+ _return_fn = std::make_unique<exec::FunctionSequence>(
+ asAclFunction(std::move(raw_fn)),
+ ActivationBuilder::generate(activation, ofm_tensor->handle()));
+}
+
void KernelGenerator::visit(const ir::operation::Permute &node)
{
const auto ofm_idx{node.getOutputs().at(0)};
const auto ifm_idx{node.getInputs().at(0)};
const auto permute_type = node.getPermuteType();
- auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
- auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
const auto rank = _ctx.at(ofm_idx).shape().rank();
assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank());
@@ -830,35 +788,22 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
// WHCN -> CWHN
pv = arm_compute::PermutationVector{2, 0, 1};
- auto l = std::make_unique<::arm_compute::NEPermute>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NEPermute>(ifm_tensor->handle(),
+ ofm_tensor->handle(), pv);
}
else if (permute_type == ir::operation::Permute::Type::NHWC_TO_NCHW && rank == 4)
{
// CWHN -> WHCN
pv = arm_compute::PermutationVector{1, 2, 0};
- auto l = std::make_unique<::arm_compute::NEPermute>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NEPermute>(ifm_tensor->handle(),
+ ofm_tensor->handle(), pv);
}
else
{
- auto l = std::make_unique<::arm_compute::NECopy>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NECopy>(ifm_tensor->handle(), ofm_tensor->handle());
}
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::PReLU &node)
@@ -867,21 +812,14 @@ void KernelGenerator::visit(const ir::operation::PReLU &node)
const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto alpha_tensor = _tensor_builder->at(alpha_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index).get();
- std::unique_ptr<::arm_compute::IFunction> fn;
-
- auto l = std::make_unique<::arm_compute::NEPReluLayer>();
-
- l->configure(ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
-
- fn = std::move(l);
+ auto fn = acl_common::generateLayer<arm_compute::NEPReluLayer>(
+ ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Reduce &node)
@@ -890,8 +828,8 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
// Convert to ACL axes taking into account negative values and possible duplicates.
const auto &axes = _ctx.at(axes_index);
@@ -906,93 +844,21 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
std::unique_ptr<::arm_compute::IFunction> fn;
if (reduce_type == ir::operation::Reduce::ReduceType::MEAN)
{
- auto l = std::make_unique<::arm_compute::NEReduceMean>();
-
- l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle());
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NEReduceMean>(input_tensor->handle(), reduce_axes,
+ keep_dims, output_tensor->handle());
}
else if (reduce_type == ir::operation::Reduce::ReduceType::SUM)
{
- auto l = std::make_unique<::arm_compute::NEReduceSum>();
-
- l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle());
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NEReduceSum>(input_tensor->handle(), reduce_axes,
+ keep_dims, output_tensor->handle());
}
else
{
- auto l = std::make_unique<::arm_compute::NEReduceOperation>();
-
- l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle(),
- acl_common::convertReduceType(reduce_type));
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NEReduceOperation>(
+ input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle(),
+ acl_common::convertReduceType(reduce_type));
}
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<arm_compute::NEActivationLayer>();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU1 &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
-
- auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU6 &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f};
-
- auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Reshape &node)
@@ -1000,8 +866,8 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
// NOTE This operation must not be changed the layout from frontend to backend
// So, PermutationOperationPass makes layouts of frontend and backend the same.
@@ -1012,13 +878,10 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
UNUSED_RELEASE(frontend_layout);
UNUSED_RELEASE(backend_layout);
- auto fn = std::make_unique<arm_compute::NEReshapeLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle());
+ auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(),
+ output_tensor->handle());
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
@@ -1027,18 +890,15 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEScale>();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(),
- ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
- ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
+ auto fn = acl_common::generateLayer<arm_compute::NEScale>(
+ ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::InterpolationPolicy::BILINEAR,
+ ::arm_compute::BorderMode::REPLICATE, ::arm_compute::PixelValue(0.f),
+ ::arm_compute::SamplingPolicy::TOP_LEFT);
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::RNN &node)
@@ -1056,40 +916,24 @@ void KernelGenerator::visit(const ir::operation::RNN &node)
const auto activation = node.param().activation;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto hidden_state_out_tensor = _tensor_builder->at(hidden_state_out_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto hidden_state_out_tensor = _tensor_reg->getAclTensor(hidden_state_out_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
- auto weights_tensor = _tensor_builder->at(weights_index).get();
- auto recurrent_weights_tensor = _tensor_builder->at(recurrent_weights_index).get();
- auto bias_tensor = _tensor_builder->at(bias_index).get();
- auto hidden_state_in_tensor = _tensor_builder->at(hidden_state_in_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+ auto weights_tensor = _tensor_reg->getAclTensor(weights_index).get();
+ auto recurrent_weights_tensor = _tensor_reg->getAclTensor(recurrent_weights_index).get();
+ auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
+ auto hidden_state_in_tensor = _tensor_reg->getAclTensor(hidden_state_in_index).get();
auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
- auto copy_layer = std::make_unique<::arm_compute::NECopy>();
- copy_layer->configure(hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
+ auto copy_layer = acl_common::generateLayer<arm_compute::NECopy>(
+ hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
_return_fn = asAclFunction(std::move(copy_layer));
- auto fn = std::make_unique<::arm_compute::NERNNLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
- fn->configure(input_tensor->handle(), weights_tensor->handle(),
- recurrent_weights_tensor->handle(), bias_tensor->handle(),
- hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
- _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::RSQRT &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::NERsqrtLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
+ auto fn = acl_common::generateLayer<arm_compute::NERNNLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+ weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(),
+ hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
_return_fn = asAclFunction(std::move(fn));
}
@@ -1105,32 +949,11 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node)
(void)dims;
(void)ndim;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
- auto fn = std::make_unique<arm_compute::NEReshapeLayer>();
- fn->configure(input_tensor->handle(), output_tensor->handle());
- auto acl_fn = asAclFunction(std::move(fn));
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Tanh &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<arm_compute::NEActivationLayer>();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+ auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Softmax &node)
@@ -1139,8 +962,8 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
const auto beta = node.param().beta;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = input_tensor->layout();
@@ -1154,14 +977,11 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
acl_common::asTensorShape(input.shape(), frontend_layout, backend_layout, false));
}
- auto fn = std::make_unique<::arm_compute::NESoftmaxLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), beta);
+ auto fn = acl_common::generateLayer<arm_compute::NESoftmaxLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+ output_tensor->handle(), beta);
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
@@ -1172,22 +992,19 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto block_size_tensor = _tensor_builder->at(block_size_index).get();
- auto paddings_tensor = _tensor_builder->at(paddings_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
+ auto paddings_tensor = _tensor_reg->getAclTensor(paddings_index).get();
assert(_ctx.at(block_size_index).data());
assert(_ctx.at(paddings_index).data());
- auto fn = std::make_unique<::arm_compute::NESpaceToBatchLayer>();
-
- fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
- ofm_tensor->handle());
+ auto fn = acl_common::generateLayer<arm_compute::NESpaceToBatchLayer>(
+ ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
+ ofm_tensor->handle());
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
@@ -1197,16 +1014,13 @@ void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
auto block_size = node.param().block_size;
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::NESpaceToDepthLayer>();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), block_size);
+ auto fn = acl_common::generateLayer<arm_compute::NESpaceToDepthLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), block_size);
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Split &node)
@@ -1221,10 +1035,10 @@ void KernelGenerator::visit(const ir::operation::Split &node)
for (const auto &output : node.getOutputs())
output_indexes.emplace_back(output);
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
std::vector<arm_compute::ITensor *> output_tensors;
for (const auto &ofm_ind : output_indexes)
- output_tensors.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
+ output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind).get()->handle());
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = ifm_tensor->layout();
@@ -1233,71 +1047,26 @@ void KernelGenerator::visit(const ir::operation::Split &node)
axis += ifm_rank;
axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value();
- auto fn = std::make_unique<::arm_compute::NESplit>();
-
- fn->configure(ifm_tensor->handle(), output_tensors, axis);
+ auto fn =
+ acl_common::generateLayer<arm_compute::NESplit>(ifm_tensor->handle(), output_tensors, axis);
_return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::SQRT &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
-
- auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
{
const auto ofm_index{node.getOutputs().at(0)};
const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEElementwiseSquaredDiff>();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+ auto fn = acl_common::generateLayer<arm_compute::NEElementwiseSquaredDiff>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Sub &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEArithmeticSubtraction>();
-
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
- arm_compute::ConvertPolicy::SATURATE);
-
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Slice &node)
@@ -1307,8 +1076,8 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
- auto outputData_tensor = _tensor_builder->at(output_index).get();
- auto inputData_tensor = _tensor_builder->at(input_index).get();
+ auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = inputData_tensor->layout();
@@ -1358,13 +1127,10 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
ends_set.set(i, ends[i]);
}
- auto fn = std::make_unique<::arm_compute::NESlice>();
-
- fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
+ auto fn = acl_common::generateLayer<arm_compute::NESlice>(
+ inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::StridedSlice &node)
@@ -1375,8 +1141,8 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
- auto outputData_tensor = _tensor_builder->at(output_index).get();
- auto inputData_tensor = _tensor_builder->at(input_index).get();
+ auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = inputData_tensor->layout();
@@ -1445,14 +1211,11 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
strides_set.set(i, strides[i]);
}
- auto fn = std::make_unique<::arm_compute::NEStridedSlice>();
+ auto fn = acl_common::generateLayer<arm_compute::NEStridedSlice>(
+ inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
+ begin_mask, end_mask, shrink_axis_mask);
- fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set,
- strides_set, begin_mask, end_mask, shrink_axis_mask);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::TransposeConv &node)
@@ -1481,20 +1244,17 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
}
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto ker_tensor = _tensor_builder->at(ker_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
- auto fn = std::make_unique<::arm_compute::NETransposeConvLayer>();
+ auto fn = acl_common::generateLayer<arm_compute::NETransposeConvLayer>(
+ ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info,
+ invalid_horizontal, invalid_vertical);
- fn->configure(ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(),
- tconv_info, invalid_horizontal, invalid_vertical);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Transpose &node)
@@ -1503,8 +1263,8 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
const auto ifm_idx{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
const auto &perm{node.param().perm};
- auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
- const auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
+ const auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = ifm_tensor->layout();
@@ -1514,27 +1274,17 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
rank, pv, frontend_layout, backend_layout);
std::unique_ptr<::arm_compute::IFunction> fn;
-
if (ifm_tensor->num_dimensions() <= 2 && ofm_tensor->num_dimensions() <= 2)
{
- auto l = std::make_unique<::arm_compute::NETranspose>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NETranspose>(ifm_tensor->handle(),
+ ofm_tensor->handle());
}
else
{
- auto l = std::make_unique<::arm_compute::NEPermute>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle(), backend_pv);
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NEPermute>(ifm_tensor->handle(),
+ ofm_tensor->handle(), backend_pv);
}
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Unpack &node)
@@ -1548,25 +1298,23 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
for (const auto &output_index : node.getOutputs())
output_indexes.emplace_back(output_index);
- auto input = _tensor_builder->at(input_index).get()->handle();
+ auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
std::vector<arm_compute::ITensor *> outputs;
for (const auto &output_index : output_indexes)
- outputs.emplace_back(_tensor_builder->at(output_index)->handle());
+ outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
+ const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
if (axis < 0)
axis += input_rank;
axis = acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value();
- auto fn = std::make_unique<::arm_compute::NEUnstack>();
-
// Disable applied dim_correction
std::vector<arm_compute::TensorShape> orig_outputs_acl_tensor_shapes;
for (const auto &output_index : output_indexes)
{
size_t output_rank = _ctx.at(output_index).shape().rank();
- const auto &output_tensor = _tensor_builder->at(output_index);
+ const auto &output_tensor = _tensor_reg->getAclTensor(output_index);
orig_outputs_acl_tensor_shapes.emplace_back(output_tensor->info()->tensor_shape());
assert(output_rank == output_tensor->num_dimensions());
if (output_rank != output_tensor->info()->num_dimensions())
@@ -1577,84 +1325,23 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
}
}
- fn->configure(input, outputs, axis);
+ auto fn = acl_common::generateLayer<arm_compute::NEUnstack>(input, outputs, axis);
_return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::Add &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEArithmeticAddition>();
-
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
- arm_compute::ConvertPolicy::SATURATE);
-
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::Div &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEElementwiseDivision>();
-
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::Exp &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEExpLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
void KernelGenerator::visit(const ir::operation::ExpandDims &node)
{
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
- auto fn = std::make_unique<::arm_compute::NEReshapeLayer>();
+ auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(),
+ output_tensor->handle());
- fn->configure(input_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Comparison &node)
@@ -1665,56 +1352,15 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
const auto comparison_type = node.param().comparison_type;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input0_tensor = _tensor_builder->at(input0_index).get();
- auto input1_tensor = _tensor_builder->at(input1_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEElementwiseComparison>();
-
- fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
- (arm_compute::ComparisonOperation)comparison_type);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Min &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEElementwiseMin>();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input0_tensor = _tensor_reg->getAclTensor(input0_index).get();
+ auto input1_tensor = _tensor_reg->getAclTensor(input1_index).get();
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+ auto fn = acl_common::generateLayer<arm_compute::NEElementwiseComparison>(
+ input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
+ (arm_compute::ComparisonOperation)comparison_type);
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Max &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEElementwiseMax>();
-
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::OneHot &node)
@@ -1726,17 +1372,16 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
const auto offvalue_idx{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)};
const auto axis = node.param().axis;
- auto output_tensor = _tensor_builder->at(out_idx).get();
- auto indices_tensor = _tensor_builder->at(indices_idx).get();
- auto depth_tensor = _tensor_builder->at(depth_idx).get();
- auto onvalue_tensor = _tensor_builder->at(onvalue_idx).get();
- auto offvalue_tensor = _tensor_builder->at(offvalue_idx).get();
-
- auto fn = std::make_unique<::arm_compute::CPPOneHotEx>();
- fn->configure(indices_tensor->handle(), depth_tensor->handle(), onvalue_tensor->handle(),
- offvalue_tensor->handle(), output_tensor->handle(), axis);
- auto acl_fn = asAclFunction(std::move(fn));
- _return_fn = std::move(acl_fn);
+ auto output_tensor = _tensor_reg->getAclTensor(out_idx).get();
+ auto indices_tensor = _tensor_reg->getAclTensor(indices_idx).get();
+ auto depth_tensor = _tensor_reg->getAclTensor(depth_idx).get();
+ auto onvalue_tensor = _tensor_reg->getAclTensor(onvalue_idx).get();
+ auto offvalue_tensor = _tensor_reg->getAclTensor(offvalue_idx).get();
+
+ auto fn = acl_common::generateLayer<arm_compute::CPPOneHotEx>(
+ indices_tensor->handle(), depth_tensor->handle(), onvalue_tensor->handle(),
+ offvalue_tensor->handle(), output_tensor->handle(), axis);
+ _return_fn = asAclFunction(std::move(fn));
}
} // namespace acl_neon
diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.h b/runtime/onert/backend/acl_neon/KernelGenerator.h
index d6f7932b7..4d269cde5 100644
--- a/runtime/onert/backend/acl_neon/KernelGenerator.h
+++ b/runtime/onert/backend/acl_neon/KernelGenerator.h
@@ -21,6 +21,8 @@
#include "ir/Operands.h"
#include "TensorBuilder.h"
+#include "AclTensorRegistry.h"
+#include "TensorManager.h"
namespace onert
{
@@ -33,75 +35,57 @@ class KernelGenerator : public IKernelGenerator
{
public:
KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder);
+ const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg);
void visit(const ir::OpSequence &) override;
- void visit(const ir::operation::Abs &) override;
void visit(const ir::operation::ArgMax &) override;
void visit(const ir::operation::BatchToSpaceND &) override;
- void visit(const ir::operation::Cast &) override;
+ void visit(const ir::operation::BinaryArithmetic &) override;
void visit(const ir::operation::Conv2D &) override;
void visit(const ir::operation::DepthToSpace &) override;
void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::Dequantize &) override;
- void visit(const ir::operation::MaxPool2D &) override;
- void visit(const ir::operation::AvgPool2D &) override;
void visit(const ir::operation::Concat &) override;
+ void visit(const ir::operation::ElementwiseActivation &) override;
+ void visit(const ir::operation::ElementwiseBinary &) override;
+ void visit(const ir::operation::ElementwiseUnary &) override;
void visit(const ir::operation::EmbeddingLookup &) override;
- void visit(const ir::operation::Floor &) override;
void visit(const ir::operation::FullyConnected &) override;
void visit(const ir::operation::Gather &) override;
void visit(const ir::operation::HashtableLookup &) override;
void visit(const ir::operation::InstanceNorm &) override;
void visit(const ir::operation::L2Normalization &) override;
- void visit(const ir::operation::L2Pool2D &) override;
void visit(const ir::operation::LocalResponseNormalization &) override;
- void visit(const ir::operation::LogicalAnd &) override;
- void visit(const ir::operation::LogicalNot &) override;
- void visit(const ir::operation::LogicalOr &) override;
- void visit(const ir::operation::Logistic &) override;
void visit(const ir::operation::LSTM &) override;
- void visit(const ir::operation::Mul &) override;
- void visit(const ir::operation::Neg &) override;
void visit(const ir::operation::Pack &) override;
void visit(const ir::operation::Pad &) override;
+ void visit(const ir::operation::Pool2D &) override;
void visit(const ir::operation::Permute &) override;
void visit(const ir::operation::PReLU &) override;
void visit(const ir::operation::Reduce &) override;
- void visit(const ir::operation::ReLU &) override;
- void visit(const ir::operation::ReLU1 &) override;
- void visit(const ir::operation::ReLU6 &) override;
void visit(const ir::operation::Reshape &) override;
void visit(const ir::operation::ResizeBilinear &) override;
void visit(const ir::operation::RNN &) override;
- void visit(const ir::operation::RSQRT &) override;
void visit(const ir::operation::Squeeze &) override;
- void visit(const ir::operation::Tanh &) override;
void visit(const ir::operation::Softmax &) override;
void visit(const ir::operation::SpaceToBatchND &) override;
void visit(const ir::operation::SpaceToDepth &) override;
void visit(const ir::operation::Split &) override;
- void visit(const ir::operation::SQRT &) override;
void visit(const ir::operation::SquaredDifference &) override;
- void visit(const ir::operation::Sub &) override;
void visit(const ir::operation::Slice &) override;
void visit(const ir::operation::StridedSlice &) override;
void visit(const ir::operation::TransposeConv &) override;
void visit(const ir::operation::Transpose &) override;
void visit(const ir::operation::Unpack &) override;
- void visit(const ir::operation::Add &) override;
- void visit(const ir::operation::Div &) override;
- void visit(const ir::operation::Exp &) override;
void visit(const ir::operation::ExpandDims &) override;
void visit(const ir::operation::Comparison &) override;
- void visit(const ir::operation::Min &) override;
- void visit(const ir::operation::Max &) override;
void visit(const ir::operation::OneHot &) override;
private:
const ir::Operands &_ctx;
const ir::Operations &_operations_ctx;
std::shared_ptr<TensorBuilder> _tensor_builder;
+ std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg;
ir::Layout _current_op_seq_layout;
};
diff --git a/runtime/onert/backend/acl_neon/Optimizer.cc b/runtime/onert/backend/acl_neon/Optimizer.cc
index 2948cab09..ac80901cc 100644
--- a/runtime/onert/backend/acl_neon/Optimizer.cc
+++ b/runtime/onert/backend/acl_neon/Optimizer.cc
@@ -19,7 +19,7 @@
#include "ParentInfo.h"
#include <cassert>
-#include <ir/LoweredGraph.h>
+#include <compiler/LoweredGraph.h>
#include <util/logging.h>
#include "AclSubTensorAnalyzer.h"
diff --git a/runtime/onert/backend/acl_neon/TensorManager.h b/runtime/onert/backend/acl_neon/TensorManager.h
index 3ec9efa8f..3b7cfbcfd 100644
--- a/runtime/onert/backend/acl_neon/TensorManager.h
+++ b/runtime/onert/backend/acl_neon/TensorManager.h
@@ -55,7 +55,7 @@ using InternalBufferManager = acl_common::AclInternalBufferManager<
using TensorManager = acl_common::AclTensorManager<acl_neon::operand::INETensor, operand::NETensor,
operand::NESubTensor>;
-TensorManager *createTensorManager(bool is_linear_executor)
+inline TensorManager *createTensorManager(bool is_linear_executor)
{
if (is_linear_executor)
{
diff --git a/runtime/onert/backend/cpu/Backend.h b/runtime/onert/backend/cpu/Backend.h
index 56bd352e0..fc8574b26 100644
--- a/runtime/onert/backend/cpu/Backend.h
+++ b/runtime/onert/backend/cpu/Backend.h
@@ -47,10 +47,12 @@ public:
const auto &operands = graph.operands();
const auto &operations = graph.operations();
auto context = std::make_unique<BackendContext>(this, &graph);
- auto tb = std::make_shared<TensorBuilder>();
+ auto tr = std::make_shared<cpu_common::TensorRegistry>();
+ auto tb = std::make_shared<TensorBuilder>(tr);
+ context->tensor_registry = tr;
context->tensor_builder = tb;
- context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb);
- context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, kb,
+ context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+ context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb,
context->external_context());
context->tensor_register = nullptr;
context->optimizer = nullptr;
diff --git a/runtime/onert/backend/cpu/BackendContext.h b/runtime/onert/backend/cpu/BackendContext.h
index f314a8e39..e90b21054 100644
--- a/runtime/onert/backend/cpu/BackendContext.h
+++ b/runtime/onert/backend/cpu/BackendContext.h
@@ -31,13 +31,15 @@ class BackendContext : public onert::backend::BackendContext
{
public:
BackendContext(const Backend *backend, const ir::Graph *graph,
+ std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
std::shared_ptr<ITensorBuilder> tensor_builder = nullptr,
std::shared_ptr<IConstantInitializer> constant_initializer = nullptr,
std::shared_ptr<IKernelGenerator> kernel_gen = nullptr,
std::shared_ptr<ITensorRegister> tensor_register = nullptr,
std::shared_ptr<IOptimizer> optimizer = nullptr)
- : onert::backend::BackendContext(backend, graph, tensor_builder, constant_initializer,
- kernel_gen, tensor_register, optimizer),
+ : onert::backend::BackendContext(backend, graph, tensor_registry, tensor_builder,
+ constant_initializer, kernel_gen, tensor_register,
+ optimizer),
_external_context(new ExternalContext)
{
}
diff --git a/runtime/onert/backend/cpu/ConstantInitializer.cc b/runtime/onert/backend/cpu/ConstantInitializer.cc
index deb27f0fe..6f6eb77bc 100644
--- a/runtime/onert/backend/cpu/ConstantInitializer.cc
+++ b/runtime/onert/backend/cpu/ConstantInitializer.cc
@@ -25,8 +25,8 @@ namespace cpu
{
ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
- : IConstantInitializer{operands}, _tensor_builder{tensor_builder}
+ const std::shared_ptr<ITensorRegistry> &tensor_reg)
+ : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
{
// DO NOTHING
}
diff --git a/runtime/onert/backend/cpu/ConstantInitializer.h b/runtime/onert/backend/cpu/ConstantInitializer.h
index de03a693a..c016c83bc 100644
--- a/runtime/onert/backend/cpu/ConstantInitializer.h
+++ b/runtime/onert/backend/cpu/ConstantInitializer.h
@@ -17,7 +17,7 @@
#ifndef __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
#define __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
-#include "TensorBuilder.h"
+#include "backend/cpu_common/TensorRegistry.h"
#include <backend/IConstantInitializer.h>
#include <ir/Operands.h>
@@ -33,7 +33,7 @@ class ConstantInitializer : public IConstantInitializer
{
public:
ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder);
+ const std::shared_ptr<ITensorRegistry> &tensor_reg);
public:
void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj) override;
@@ -50,10 +50,10 @@ public:
void visit(const ir::operation::FullyConnected &) override;
private:
- std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
+ std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; }
private:
- std::shared_ptr<TensorBuilder> _tensor_builder;
+ std::shared_ptr<ITensorRegistry> _tensor_reg;
};
} // namespace cpu
diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc
index 7939fe894..74b6f0c6b 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.cc
+++ b/runtime/onert/backend/cpu/KernelGenerator.cc
@@ -16,49 +16,36 @@
#include "KernelGenerator.h"
-#include "ops/AbsLayer.h"
-#include "ops/AddLayer.h"
#include "ops/ArgMinMaxLayer.h"
-#include "ops/AvgPoolLayer.h"
#include "ops/BatchToSpaceNDLayer.h"
-#include "ops/CastLayer.h"
+#include "ops/BinaryArithmeticLayer.h"
#include "ops/CompareLayer.h"
#include "ops/ConcatLayer.h"
#include "ops/ConvolutionLayer.h"
-#include "ops/CosLayer.h"
#include "ops/DepthwiseConvolutionLayer.h"
-#include "ops/DivLayer.h"
#include "ops/EinsumLayer.h"
-#include "ops/ExpLayer.h"
+#include "ops/ElementwiseActivationLayer.h"
+#include "ops/ElementwiseBinaryLayer.h"
+#include "ops/ElementwiseUnaryLayer.h"
#include "ops/ExpandDimsLayer.h"
#include "ops/FillLayer.h"
#include "ops/FullyConnectedLayer.h"
#include "ops/GatherLayer.h"
-#include "ops/LogLayer.h"
-#include "ops/LogisticLayer.h"
-#include "ops/MaxLayer.h"
-#include "ops/MaxPoolLayer.h"
#include "ops/MeanLayer.h"
-#include "ops/MinLayer.h"
-#include "ops/MulLayer.h"
-#include "ops/NegLayer.h"
#include "ops/OneHotLayer.h"
#include "ops/OperationUtils.h"
#include "ops/PackLayer.h"
#include "ops/PadLayer.h"
+#include "ops/PoolLayer.h"
#include "ops/PowLayer.h"
#include "ops/RangeLayer.h"
+#include "ops/RankLayer.h"
#include "ops/ReduceLayer.h"
-#include "ops/ReLULayer.h"
-#include "ops/ReLU6Layer.h"
#include "ops/ReshapeLayer.h"
#include "ops/ResizeBilinearLayer.h"
#include "ops/ReverseLayer.h"
-#include "ops/RoundLayer.h"
-#include "ops/RsqrtLayer.h"
#include "ops/SelectLayer.h"
#include "ops/ShapeLayer.h"
-#include "ops/SinLayer.h"
#include "ops/SliceLayer.h"
#include "ops/SoftMaxLayer.h"
#include "ops/StridedSliceLayer.h"
@@ -66,22 +53,16 @@
#include "ops/SpaceToDepthLayer.h"
#include "ops/SplitLayer.h"
#include "ops/SplitVLayer.h"
-#include "ops/SubLayer.h"
-#include "ops/TanhLayer.h"
#include "ops/TileLayer.h"
#include "ops/TransposeLayer.h"
#include "ops/UnpackLayer.h"
-#include "ops/LogicalNotLayer.h"
-#include "ops/ZerosLikeLayer.h"
#include "ops/SquaredDiffLayer.h"
-#include "ops/LogicalOrLayer.h"
#include "ops/L2NormLayer.h"
#include "ops/MatrixBandPartLayer.h"
#include "ops/BatchMatMulLayer.h"
#include "ops/BroadcastToLayer.h"
#include "ops/FusedBatchNormLayer.h"
#include "ops/LogSoftMaxLayer.h"
-#include "ops/QuantizeLayer.h"
#include "ops/StatelessRandomUniformLayer.h"
#include <backend/Backend.h>
@@ -102,6 +83,104 @@ namespace cpu
namespace
{
+ops::ArithmeticType
+convertArithmeticType(ir::operation::BinaryArithmetic::ArithmeticType arithmetic_type_ir)
+{
+ switch (arithmetic_type_ir)
+ {
+ case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+ return ops::ArithmeticType::kAdd;
+ case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+ return ops::ArithmeticType::kSub;
+ case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+ return ops::ArithmeticType::kMul;
+ case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+ return ops::ArithmeticType::kDiv;
+ default:
+ throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+ }
+}
+
+ops::ElementwiseActivationType
+convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type_ir)
+{
+ switch (type_ir)
+ {
+ case ir::operation::ElementwiseActivation::Type::LOGISTIC:
+ return ops::ElementwiseActivationType::kLogistic;
+ case ir::operation::ElementwiseActivation::Type::RELU:
+ return ops::ElementwiseActivationType::kReLU;
+ case ir::operation::ElementwiseActivation::Type::TANH:
+ return ops::ElementwiseActivationType::kTanh;
+ default:
+ throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+ }
+}
+
+ops::ElementwiseBinaryType
+convertElementwiseBinaryType(ir::operation::ElementwiseBinary::ElementwiseBinaryType type_ir)
+{
+ switch (type_ir)
+ {
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
+ return ops::ElementwiseBinaryType::kLogicalOr;
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
+ return ops::ElementwiseBinaryType::kMax;
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
+ return ops::ElementwiseBinaryType::kMin;
+ default:
+ throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+ }
+}
+
+ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::ElementwiseUnary::Type type_ir)
+{
+ switch (type_ir)
+ {
+ case ir::operation::ElementwiseUnary::Type::ABS:
+ return ops::ElementwiseUnaryType::kAbs;
+ case ir::operation::ElementwiseUnary::Type::CAST:
+ return ops::ElementwiseUnaryType::kCast;
+ case ir::operation::ElementwiseUnary::Type::COS:
+ return ops::ElementwiseUnaryType::kCos;
+ case ir::operation::ElementwiseUnary::Type::ERF:
+ return ops::ElementwiseUnaryType::kErf;
+ case ir::operation::ElementwiseUnary::Type::EXP:
+ return ops::ElementwiseUnaryType::kExp;
+ case ir::operation::ElementwiseUnary::Type::LOG:
+ return ops::ElementwiseUnaryType::kLog;
+ case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
+ return ops::ElementwiseUnaryType::kLogicalNot;
+ case ir::operation::ElementwiseUnary::Type::NEG:
+ return ops::ElementwiseUnaryType::kNeg;
+ case ir::operation::ElementwiseUnary::Type::QUANTIZE:
+ return ops::ElementwiseUnaryType::kQuantize;
+ case ir::operation::ElementwiseUnary::Type::ROUND:
+ return ops::ElementwiseUnaryType::kRound;
+ case ir::operation::ElementwiseUnary::Type::RSQRT:
+ return ops::ElementwiseUnaryType::kRSqrt;
+ case ir::operation::ElementwiseUnary::Type::SIN:
+ return ops::ElementwiseUnaryType::kSin;
+ case ir::operation::ElementwiseUnary::Type::ZEROS_LIKE:
+ return ops::ElementwiseUnaryType::kZerosLike;
+ default:
+ throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+ }
+}
+
+ops::PoolType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
+{
+ switch (type_ir)
+ {
+ case ir::operation::Pool2D::PoolType::AVG:
+ return ops::PoolType::kAvg;
+ case ir::operation::Pool2D::PoolType::MAX:
+ return ops::PoolType::kMax;
+ default:
+ throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+ }
+}
+
ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
{
switch (reduce_type_ir)
@@ -127,11 +206,12 @@ ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_
KernelGenerator::KernelGenerator(
const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
const std::shared_ptr<ExternalContext> &external_context)
: _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
- _kernel_builder(kernel_builder), _current_op_seq_layout(ir::Layout::UNKNOWN),
- _external_context(external_context)
+ _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
+ _current_op_seq_layout(ir::Layout::UNKNOWN), _external_context(external_context)
{
// DO NOTHING
}
@@ -140,11 +220,9 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
{
assert(!_return_fn_seq);
assert(_tensor_builder->dynamicTensorManager());
- assert(_tensor_builder->tensorRegistry());
+ assert(_tensor_reg);
- auto dyn_tensor_manager = _tensor_builder->dynamicTensorManager();
- auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(
- _ctx, dyn_tensor_manager, _tensor_builder->tensorRegistry());
+ auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
_return_fn_seq = std::make_unique<exec::FunctionSequence>();
@@ -154,7 +232,7 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
dyn_ctx->op_seq = &op_seq;
dyn_ctx->operations = &_operations_ctx;
dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
- dyn_ctx->tensor_registry = _tensor_builder->tensorRegistry();
+ dyn_ctx->tensor_registry = _tensor_reg;
dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
_return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
@@ -170,13 +248,13 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
{
- auto portable_tensor = _tensor_builder->portableAt(ind);
+ auto portable_tensor = _tensor_reg->getPortableTensor(ind);
if (portable_tensor)
{
assert(portable_tensor->layout() == ir::Layout::NHWC);
}
- auto tensor = _tensor_builder->at(ind);
+ auto tensor = _tensor_reg->getNativeTensor(ind);
if (tensor)
{
tensor->increase_ref();
@@ -194,21 +272,23 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
- auto ker_tensor = _tensor_builder->portableAt(ker_index).get();
- auto bias_tensor = _tensor_builder->portableAt(bias_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
+ auto ker_tensor = _tensor_reg->getPortableTensor(ker_index).get();
+ auto bias_tensor = _tensor_reg->getPortableTensor(bias_index).get();
const auto stride = node.param().stride;
const auto activation = node.param().activation;
const auto param_padding = node.param().padding;
+ const auto dilation = node.param().dilation;
auto fn = std::make_unique<ops::ConvolutionLayer>();
if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic())
{
fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left,
param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
- stride.horizontal, stride.vertical, activation, ofm_tensor);
+ stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor,
+ activation, ofm_tensor);
_return_fn = std::move(fn);
return;
@@ -221,11 +301,12 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
const auto ker_width = ker_shape.dim(2);
const auto padding =
- ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height);
+ ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+ dilation.width_factor, dilation.height_factor);
fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
- activation, ofm_tensor);
+ dilation.width_factor, dilation.height_factor, activation, ofm_tensor);
_return_fn = std::move(fn);
}
@@ -251,10 +332,10 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
const auto multiplier = node.param().multiplier;
const auto activation = node.param().activation;
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
- auto ker_tensor = _tensor_builder->portableAt(ker_index).get();
- auto bias_tensor = _tensor_builder->portableAt(bias_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
+ auto ker_tensor = _tensor_reg->getPortableTensor(ker_index).get();
+ auto bias_tensor = _tensor_reg->getPortableTensor(bias_index).get();
auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>();
@@ -265,57 +346,6 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::MaxPool2D::Input::INPUT)};
-
- const auto kh = node.param().kh;
- const auto kw = node.param().kw;
-
- const auto stride = node.param().stride;
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
- const auto padding =
- ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::MaxPoolLayer>();
-
- fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
- stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)};
-
- const auto kh = node.param().kh;
- const auto kw = node.param().kw;
- const auto stride = node.param().stride;
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
- const auto padding =
- ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::AvgPoolLayer>();
-
- fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
- stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
void KernelGenerator::visit(const ir::operation::Concat &node)
{
const auto ofm_index{node.getOutputs().at(0)};
@@ -323,11 +353,11 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
const auto rank = _ctx.at(ofm_index).shape().rank();
const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
- auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
std::vector<const IPortableTensor *> input_tensors;
for (auto &ifm_idx : node.getInputs())
- input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+ input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
auto fn = std::make_unique<ops::ConcatLayer>();
@@ -342,9 +372,9 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
const auto input_index{node.getInputs().at(ir::operation::BatchToSpaceND::INPUT)};
const auto block_size_index{node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
- auto block_size_alloc = _tensor_builder->portableAt(block_size_index).get();
+ auto output_alloc = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_alloc = _tensor_reg->getPortableTensor(input_index).get();
+ auto block_size_alloc = _tensor_reg->getPortableTensor(block_size_index).get();
auto fn = std::make_unique<ops::BatchToSpaceNDLayer>();
@@ -354,7 +384,7 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
if (node.getInputs().size() != NNApiInputs)
{
const auto crops_data_index{node.getInputs().at(ir::operation::BatchToSpaceND::CROPS_DATA)};
- crops_alloc = _tensor_builder->portableAt(crops_data_index).get();
+ crops_alloc = _tensor_reg->getPortableTensor(crops_data_index).get();
}
fn->configure(input_alloc, output_alloc, block_size_alloc, crops_alloc);
@@ -368,9 +398,9 @@ void KernelGenerator::visit(const ir::operation::Fill &node)
const auto input_index{node.getInputs().at(ir::operation::Fill::Input::INPUT)};
const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto value_tensor = _tensor_builder->portableAt(value_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto value_tensor = _tensor_reg->getPortableTensor(value_index).get();
auto fn = std::make_unique<ops::FillLayer>();
@@ -389,11 +419,11 @@ void KernelGenerator::visit(const ir::operation::FullyConnected &node)
const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
const auto activation = node.param().activation;
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto weight_tensor = _tensor_builder->portableAt(weight_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto weight_tensor = _tensor_reg->getPortableTensor(weight_index).get();
auto bias_tensor =
- bias_index.undefined() ? nullptr : _tensor_builder->portableAt(bias_index).get();
+ bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index).get();
auto fn = std::make_unique<ops::FullyConnectedLayer>();
@@ -408,8 +438,8 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
// optional 2nd input
IPortableTensor *shape_tensor = nullptr;
@@ -417,7 +447,7 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
if (node.getInputs().size() == 2)
{
const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
- shape_tensor = _tensor_builder->portableAt(shape_index).get();
+ shape_tensor = _tensor_reg->getPortableTensor(shape_index).get();
}
auto fn = std::make_unique<ops::ReshapeLayer>();
@@ -431,8 +461,8 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
// Squeeze can share same kernel with reshape
auto fn = std::make_unique<ops::ReshapeLayer>();
@@ -449,8 +479,8 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
const auto beta = node.param().beta;
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
auto fn = std::make_unique<ops::SoftMaxLayer>();
@@ -459,21 +489,22 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Add &node)
+void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
+ const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
const auto activation = node.param().activation;
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
- auto fn = std::make_unique<ops::AddLayer>();
+ auto fn = std::make_unique<ops::BinaryArithmeticLayer>();
- fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
+ fn->configure(lhs_tensor, rhs_tensor, ofm_tensor, activation,
+ convertArithmeticType(node.param().arithmetic_type));
_return_fn = std::move(fn);
}
@@ -484,9 +515,9 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
auto comparison_type = node.param().comparison_type;
@@ -503,9 +534,9 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto indices_tensor = _tensor_builder->portableAt(indices_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto indices_tensor = _tensor_reg->getPortableTensor(indices_index).get();
const auto backend_layout = output_tensor->layout();
UNUSED_RELEASE(backend_layout);
@@ -534,46 +565,6 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Sub &node)
-{
- // The same as Add
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
- auto fn = std::make_unique<ops::SubLayer>();
-
- fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Mul &node)
-{
- // The same as Add
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
- auto fn = std::make_unique<ops::MulLayer>();
-
- fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
void KernelGenerator::visit(const ir::operation::OneHot &node)
{
const auto output_index{node.getOutputs().at(0)};
@@ -584,11 +575,11 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
const auto axis = node.param().axis;
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto indices_tensor = _tensor_builder->portableAt(indices_index).get();
- auto depth_tensor = _tensor_builder->portableAt(depth_index).get();
- auto onvalue_tensor = _tensor_builder->portableAt(onvalue_index).get();
- auto offvalue_tensor = _tensor_builder->portableAt(offvalue_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto indices_tensor = _tensor_reg->getPortableTensor(indices_index).get();
+ auto depth_tensor = _tensor_reg->getPortableTensor(depth_index).get();
+ auto onvalue_tensor = _tensor_reg->getPortableTensor(onvalue_index).get();
+ auto offvalue_tensor = _tensor_reg->getPortableTensor(offvalue_index).get();
assert(indices_tensor->data_type() == OperandType::INT32);
assert(axis <= static_cast<int>(indices_tensor->num_dimensions()));
@@ -600,34 +591,14 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Div &node)
-{
- // The same as Add
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
- auto fn = std::make_unique<ops::DivLayer>();
-
- fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
void KernelGenerator::visit(const ir::operation::Einsum &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
std::vector<const IPortableTensor *> input_tensors;
for (auto &ifm_idx : node.getInputs())
- input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+ input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
const auto equation = node.param().equation;
@@ -648,7 +619,7 @@ void KernelGenerator::visit(const ir::operation::Custom &node)
const auto &operand = _ctx.at(idx);
// TODO make sure using `_current_op_seq_layout` is correct for custom operations
types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
- auto in_tensor = _tensor_builder->portableAt(idx);
+ auto in_tensor = _tensor_reg->getPortableTensor(idx);
tensors.emplace_back(in_tensor);
}
};
@@ -666,64 +637,68 @@ void KernelGenerator::visit(const ir::operation::Custom &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Exp &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
+ const auto input_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
- auto fn = std::make_unique<ops::ExpLayer>();
+ auto fn = std::make_unique<ops::ElementwiseActivationLayer>();
- fn->configure(input_tensor, output_tensor);
+ fn->configure(input_tensor, output_tensor, node.param().alpha, node.param().beta,
+ convertElementwiseActivationType(node.param().op_type));
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::ExpandDims &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
- const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
+ const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto axis_tensor = _tensor_builder->portableAt(axis_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
- auto fn = std::make_unique<ops::ExpandDimsLayer>();
+ auto fn = std::make_unique<ops::ElementwiseBinaryLayer>();
- fn->configure(input_tensor, axis_tensor, output_tensor);
+ fn->configure(lhs_tensor, rhs_tensor, output_tensor,
+ convertElementwiseBinaryType(node.param().op_type));
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Logistic &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
+ const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
- auto fn = std::make_unique<ops::LogisticLayer>();
+ auto fn = std::make_unique<ops::ElementwiseUnaryLayer>();
- fn->configure(input_tensor, output_tensor);
+ fn->configure(input_tensor, output_tensor, convertElementwiseUnaryType(node.param().op_type));
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Tanh &node)
+void KernelGenerator::visit(const ir::operation::ExpandDims &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
+ const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
+ const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto axis_tensor = _tensor_reg->getPortableTensor(axis_index).get();
- auto fn = std::make_unique<ops::TanhLayer>();
+ auto fn = std::make_unique<ops::ExpandDimsLayer>();
- fn->configure(input_tensor, output_tensor);
+ fn->configure(input_tensor, axis_tensor, output_tensor);
_return_fn = std::move(fn);
}
@@ -737,11 +712,11 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
assert(-rank <= axis && axis < rank);
- auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
std::vector<const IPortableTensor *> input_tensors;
for (auto &ifm_idx : node.getInputs())
- input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+ input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
auto fn = std::make_unique<ops::PackLayer>();
@@ -759,11 +734,11 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
assert(rank == 0 || (-rank <= axis && axis < rank));
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
std::vector<IPortableTensor *> output_tensors;
for (auto &output_idx : node.getOutputs())
- output_tensors.emplace_back(_tensor_builder->portableAt(output_idx).get());
+ output_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get());
auto fn = std::make_unique<ops::UnpackLayer>();
@@ -781,8 +756,8 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
const auto output_index{node.getOutputs().at(0)};
assert(_ctx.at(pad_index).data());
- auto input = _tensor_builder->portableAt(input_index).get();
- auto output = _tensor_builder->portableAt(output_index).get();
+ auto input = _tensor_reg->getPortableTensor(input_index).get();
+ auto output = _tensor_reg->getPortableTensor(output_index).get();
auto pad_rank = _ctx.at(pad_index).shape().dim(0);
auto pad_base = reinterpret_cast<const int32_t *>(_ctx.at(pad_index).data()->base());
@@ -801,62 +776,13 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Max &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
- auto fn = std::make_unique<ops::MaxLayer>();
-
- fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Min &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
- auto fn = std::make_unique<ops::MinLayer>();
-
- fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Cast &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::CastLayer>();
-
- fn->configure(ifm_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
void KernelGenerator::visit(const ir::operation::Transpose &node)
{
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
auto fn = std::make_unique<ops::TransposeLayer>();
@@ -872,9 +798,9 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
const auto keep_dims = node.param().keep_dims;
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto axes_tensor = _tensor_builder->portableAt(axes_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto axes_tensor = _tensor_reg->getPortableTensor(axes_index).get();
if (node.param().reduce_type == ir::operation::Reduce::ReduceType::MEAN)
{
@@ -895,36 +821,6 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
}
}
-void KernelGenerator::visit(const ir::operation::ReLU &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(0)};
-
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
-
- auto fn = std::make_unique<ops::ReLULayer>();
-
- fn->configure(input_tensor, output_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU6 &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(0)};
-
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
-
- auto fn = std::make_unique<ops::ReLU6Layer>();
-
- fn->configure(input_tensor, output_tensor);
-
- _return_fn = std::move(fn);
-}
-
void KernelGenerator::visit(const ir::operation::Select &node)
{
const auto output_index{node.getOutputs().at(0)};
@@ -932,10 +828,10 @@ void KernelGenerator::visit(const ir::operation::Select &node)
const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto condition_tensor = _tensor_builder->portableAt(condition_index).get();
- auto true_tensor = _tensor_builder->portableAt(true_index).get();
- auto false_tensor = _tensor_builder->portableAt(false_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto condition_tensor = _tensor_reg->getPortableTensor(condition_index).get();
+ auto true_tensor = _tensor_reg->getPortableTensor(true_index).get();
+ auto false_tensor = _tensor_reg->getPortableTensor(false_index).get();
auto fn = std::make_unique<ops::SelectLayer>();
@@ -951,10 +847,10 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto begins_tensor = _tensor_builder->portableAt(begins_index).get();
- auto sizes_tensor = _tensor_builder->portableAt(sizes_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto begins_tensor = _tensor_reg->getPortableTensor(begins_index).get();
+ auto sizes_tensor = _tensor_reg->getPortableTensor(sizes_index).get();
auto fn = std::make_unique<ops::SliceLayer>();
@@ -971,11 +867,11 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto starts_tensor = _tensor_builder->portableAt(starts_index).get();
- auto ends_tensor = _tensor_builder->portableAt(ends_index).get();
- auto strides_tensor = _tensor_builder->portableAt(strides_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto starts_tensor = _tensor_reg->getPortableTensor(starts_index).get();
+ auto ends_tensor = _tensor_reg->getPortableTensor(ends_index).get();
+ auto strides_tensor = _tensor_reg->getPortableTensor(strides_index).get();
auto begin_mask = node.param().begin_mask;
auto end_mask = node.param().end_mask;
@@ -999,11 +895,11 @@ void KernelGenerator::visit(const ir::operation::Split &node)
const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
auto axis_resolved = axis < 0 ? axis + rank : axis;
- auto in_tensor = _tensor_builder->portableAt(input_idx).get();
+ auto in_tensor = _tensor_reg->getPortableTensor(input_idx).get();
std::vector<IPortableTensor *> out_tensors;
for (auto &output_idx : node.getOutputs())
- out_tensors.emplace_back(_tensor_builder->portableAt(output_idx).get());
+ out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get());
auto fn = std::make_unique<ops::SplitLayer>();
@@ -1012,73 +908,13 @@ void KernelGenerator::visit(const ir::operation::Split &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Abs &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::AbsLayer>();
-
- fn->configure(ifm_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Sin &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Sin::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::SinLayer>();
-
- fn->configure(ifm_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Cos &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Cos::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::CosLayer>();
-
- fn->configure(ifm_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::RSQRT &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::RsqrtLayer>();
-
- fn->configure(ifm_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
void KernelGenerator::visit(const ir::operation::Shape &node)
{
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
auto fn = std::make_unique<ops::ShapeLayer>();
@@ -1097,8 +933,8 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
auto align_corners = node.param().align_corners;
auto half_pixel_centers = node.param().half_pixel_centers;
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
auto fn = std::make_unique<ops::ResizeBilinearLayer>();
@@ -1114,9 +950,9 @@ void KernelGenerator::visit(const ir::operation::Reverse &node)
const auto input_index{node.getInputs().at(ir::operation::Reverse::INPUT)};
const auto axis_index{node.getInputs().at(ir::operation::Reverse::AXIS)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto axis_tensor = _tensor_builder->portableAt(axis_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto axis_tensor = _tensor_reg->getPortableTensor(axis_index).get();
auto fn = std::make_unique<ops::ReverseLayer>();
@@ -1125,21 +961,6 @@ void KernelGenerator::visit(const ir::operation::Reverse &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Neg &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::NegLayer>();
-
- fn->configure(ifm_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
void KernelGenerator::visit(const ir::operation::ArgMax &node)
{
const auto output_index{node.getOutputs().at(0)};
@@ -1147,8 +968,8 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
const auto axis = node.param().axis;
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
auto fn = std::make_unique<ops::ArgMinMaxLayer>();
@@ -1157,81 +978,45 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Pow &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
-
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
- auto fn = std::make_unique<ops::PowLayer>();
-
- fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Log &node)
+void KernelGenerator::visit(const ir::operation::Pool2D &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Log::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::LogLayer>();
-
- fn->configure(ifm_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
+ const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
-void KernelGenerator::visit(const ir::operation::Round &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Round::INPUT)};
+ const auto kh = node.param().kh;
+ const auto kw = node.param().kw;
+ const auto stride = node.param().stride;
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto padding =
+ ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+ const auto activation = node.param().activation;
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
- auto fn = std::make_unique<ops::RoundLayer>();
+ auto fn = std::make_unique<ops::PoolLayer>();
- fn->configure(input_tensor, output_tensor);
+ fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
+ stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor,
+ convertPoolType(node.param().op_type));
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::LogicalNot &node)
+void KernelGenerator::visit(const ir::operation::Pow &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::LogicalNot::INPUT)};
-
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
-
- auto fn = std::make_unique<ops::LogicalNotLayer>();
-
- fn->configure(input_tensor, output_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalOr &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(0)};
- const auto rhs_index{node.getInputs().at(1)};
+ const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
- auto fn = std::make_unique<ops::LogicalOrLayer>();
+ auto fn = std::make_unique<ops::PowLayer>();
- fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
+ fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor);
_return_fn = std::move(fn);
}
@@ -1241,8 +1026,8 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(0)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
+ auto output_alloc = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_alloc = _tensor_reg->getPortableTensor(input_index).get();
auto fn = std::make_unique<ops::L2NormLayer>();
@@ -1251,35 +1036,36 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::ZerosLike &node)
+void KernelGenerator::visit(const ir::operation::Range &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ZerosLike::INPUT)};
+ const auto start_index{node.getInputs().at(ir::operation::Range::START)};
+ const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
+ const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto start_tensor = _tensor_reg->getPortableTensor(start_index).get();
+ auto limit_tensor = _tensor_reg->getPortableTensor(limit_index).get();
+ auto delta_tensor = _tensor_reg->getPortableTensor(delta_index).get();
- auto fn = std::make_unique<ops::ZerosLikeLayer>();
+ auto fn = std::make_unique<ops::RangeLayer>();
- fn->configure(input_tensor, output_tensor);
+ fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor);
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Range &node)
+void KernelGenerator::visit(const ir::operation::Rank &node)
{
- const auto output_index{node.getOutputs().at(0)};
- const auto start_index{node.getInputs().at(ir::operation::Range::START)};
- const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
- const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto start_tensor = _tensor_builder->portableAt(start_index).get();
- auto limit_tensor = _tensor_builder->portableAt(limit_index).get();
- auto delta_tensor = _tensor_builder->portableAt(delta_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
- auto fn = std::make_unique<ops::RangeLayer>();
+ auto fn = std::make_unique<ops::RankLayer>();
+
+ fn->configure(ifm_tensor, ofm_tensor);
- fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor);
_return_fn = std::move(fn);
}
@@ -1289,9 +1075,9 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
auto fn = std::make_unique<ops::SqDiffLayer>();
@@ -1305,9 +1091,9 @@ void KernelGenerator::visit(const ir::operation::Tile &node)
const auto input_index{node.getInputs().at(ir::operation::Tile::INPUT)};
const auto multiples_index{node.getInputs().at(ir::operation::Tile::MULTIPLES)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto multiples_tensor = _tensor_builder->portableAt(multiples_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto multiples_tensor = _tensor_reg->getPortableTensor(multiples_index).get();
auto fn = std::make_unique<ops::TileLayer>();
@@ -1322,10 +1108,10 @@ void KernelGenerator::visit(const ir::operation::MatrixBandPart &node)
const auto num_lower_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_LOWER_DIAG)};
const auto num_upper_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_UPPER_DIAG)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto num_lower_tensor = _tensor_builder->portableAt(num_lower_index).get();
- auto num_upper_tensor = _tensor_builder->portableAt(num_upper_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto num_lower_tensor = _tensor_reg->getPortableTensor(num_lower_index).get();
+ auto num_upper_tensor = _tensor_reg->getPortableTensor(num_upper_index).get();
auto fn = std::make_unique<ops::MatrixBandPartLayer>();
@@ -1339,9 +1125,9 @@ void KernelGenerator::visit(const ir::operation::BatchMatMul &node)
const auto lhs_index{node.getInputs().at(ir::operation::BatchMatMul::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::BatchMatMul::RHS)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
const auto adj_x = node.param().adj_x;
const auto adj_y = node.param().adj_y;
@@ -1358,9 +1144,9 @@ void KernelGenerator::visit(const ir::operation::BroadcastTo &node)
const auto input_index{node.getInputs().at(ir::operation::BroadcastTo::INPUT)};
const auto shape_index{node.getInputs().at(ir::operation::BroadcastTo::SHAPE)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto shape_tensor = _tensor_builder->portableAt(shape_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto shape_tensor = _tensor_reg->getPortableTensor(shape_index).get();
auto fn = std::make_unique<ops::BroadcastToLayer>();
@@ -1373,10 +1159,10 @@ void KernelGenerator::visit(const ir::operation::FusedBatchNorm &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
std::vector<const IPortableTensor *> input_tensors;
for (auto &ifm_idx : node.getInputs())
- input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+ input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
const auto epsilon = node.param().epsilon;
const auto is_training = node.param().is_training;
@@ -1397,8 +1183,8 @@ void KernelGenerator::visit(const ir::operation::LogSoftmax &node)
const auto beta = node.param().beta;
const auto axis = node.param().axis;
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
auto fn = std::make_unique<ops::LogSoftMaxLayer>();
@@ -1414,10 +1200,10 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
const auto block_shape_index{node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE)};
const auto padding_index{node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto block_shape_tensor = _tensor_builder->portableAt(block_shape_index).get();
- auto padding_tensor = _tensor_builder->portableAt(padding_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto block_shape_tensor = _tensor_reg->getPortableTensor(block_shape_index).get();
+ auto padding_tensor = _tensor_reg->getPortableTensor(padding_index).get();
auto fn = std::make_unique<ops::SpaceToBatchNDLayer>();
@@ -1426,29 +1212,14 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Quantize &node)
-{
- const auto input_index{node.getInputs().at(ir::operation::Quantize::Input::INPUT)};
- const auto output_index{node.getOutputs().at(0)};
-
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
-
- auto fn = std::make_unique<ops::QuantizeLayer>();
-
- fn->configure(input_tensor, output_tensor);
-
- _return_fn = std::move(fn);
-}
-
void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
{
const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
const auto output_index{node.getOutputs().at(0)};
auto block_size = node.param().block_size;
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
auto fn = std::make_unique<ops::SpaceToDepthLayer>();
@@ -1462,9 +1233,9 @@ void KernelGenerator::visit(const ir::operation::StatelessRandomUniform &node)
const auto shape_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SHAPE)};
const auto seed_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SEED)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto shape_alloc = _tensor_builder->portableAt(shape_index).get();
- auto seed_alloc = _tensor_builder->portableAt(seed_index).get();
+ auto output_alloc = _tensor_reg->getPortableTensor(output_index).get();
+ auto shape_alloc = _tensor_reg->getPortableTensor(shape_index).get();
+ auto seed_alloc = _tensor_reg->getPortableTensor(seed_index).get();
auto fn = std::make_unique<ops::StatelessRandomUniformLayer>();
@@ -1481,13 +1252,13 @@ void KernelGenerator::visit(const ir::operation::SplitV &node)
const auto size_splits{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)};
const auto split_dim{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)};
- auto in_tensor = _tensor_builder->portableAt(input_idx).get();
- auto in_size_splits = _tensor_builder->portableAt(size_splits).get();
- auto in_split_dim = _tensor_builder->portableAt(split_dim).get();
+ auto in_tensor = _tensor_reg->getPortableTensor(input_idx).get();
+ auto in_size_splits = _tensor_reg->getPortableTensor(size_splits).get();
+ auto in_split_dim = _tensor_reg->getPortableTensor(split_dim).get();
std::vector<IPortableTensor *> out_tensors;
for (auto &output_idx : node.getOutputs())
- out_tensors.emplace_back(_tensor_builder->portableAt(output_idx).get());
+ out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get());
auto fn = std::make_unique<ops::SplitVLayer>();
diff --git a/runtime/onert/backend/cpu/KernelGenerator.h b/runtime/onert/backend/cpu/KernelGenerator.h
index 40c056a96..786e68ee0 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.h
+++ b/runtime/onert/backend/cpu/KernelGenerator.h
@@ -19,6 +19,7 @@
#include "ExternalContext.h"
#include "TensorBuilder.h"
+#include "backend/cpu_common/TensorRegistry.h"
#include "Tensor.h"
#include <backend/CustomKernelBuilder.h>
@@ -38,6 +39,7 @@ class KernelGenerator : public IKernelGenerator
public:
KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
const std::shared_ptr<ExternalContext> &external_context);
@@ -46,8 +48,6 @@ public:
void visit(const ir::OpSequence &) override;
void visit(const ir::operation::Conv2D &) override;
void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::MaxPool2D &) override;
- void visit(const ir::operation::AvgPool2D &) override;
void visit(const ir::operation::Concat &) override;
void visit(const ir::operation::Fill &) override;
void visit(const ir::operation::FullyConnected &) override;
@@ -55,51 +55,35 @@ public:
void visit(const ir::operation::Squeeze &) override;
void visit(const ir::operation::Softmax &) override;
void visit(const ir::operation::Comparison &) override;
- void visit(const ir::operation::Add &) override;
- void visit(const ir::operation::Sub &) override;
- void visit(const ir::operation::Mul &) override;
- void visit(const ir::operation::Div &) override;
+ void visit(const ir::operation::BinaryArithmetic &) override;
void visit(const ir::operation::Einsum &) override;
void visit(const ir::operation::Gather &) override;
void visit(const ir::operation::Custom &node) override;
- void visit(const ir::operation::Exp &) override;
+ void visit(const ir::operation::ElementwiseActivation &) override;
+ void visit(const ir::operation::ElementwiseBinary &) override;
+ void visit(const ir::operation::ElementwiseUnary &) override;
void visit(const ir::operation::ExpandDims &) override;
- void visit(const ir::operation::Logistic &) override;
void visit(const ir::operation::Pad &) override;
- void visit(const ir::operation::Max &) override;
- void visit(const ir::operation::Min &) override;
- void visit(const ir::operation::Tanh &) override;
void visit(const ir::operation::Pack &) override;
void visit(const ir::operation::Unpack &) override;
void visit(const ir::operation::OneHot &) override;
- void visit(const ir::operation::Cast &) override;
void visit(const ir::operation::Transpose &) override;
void visit(const ir::operation::Reduce &) override;
- void visit(const ir::operation::ReLU &) override;
- void visit(const ir::operation::ReLU6 &) override;
void visit(const ir::operation::Select &) override;
void visit(const ir::operation::Slice &) override;
void visit(const ir::operation::StridedSlice &) override;
void visit(const ir::operation::Split &) override;
- void visit(const ir::operation::Abs &) override;
- void visit(const ir::operation::Cos &) override;
- void visit(const ir::operation::Sin &) override;
- void visit(const ir::operation::RSQRT &) override;
void visit(const ir::operation::Shape &) override;
void visit(const ir::operation::ResizeBilinear &node) override;
void visit(const ir::operation::Reverse &) override;
- void visit(const ir::operation::Neg &) override;
void visit(const ir::operation::ArgMax &) override;
- void visit(const ir::operation::Log &) override;
- void visit(const ir::operation::Round &) override;
+ void visit(const ir::operation::Pool2D &) override;
void visit(const ir::operation::Pow &) override;
- void visit(const ir::operation::LogicalNot &) override;
- void visit(const ir::operation::ZerosLike &) override;
void visit(const ir::operation::SquaredDifference &) override;
void visit(const ir::operation::Tile &) override;
- void visit(const ir::operation::LogicalOr &) override;
void visit(const ir::operation::L2Normalization &) override;
void visit(const ir::operation::Range &) override;
+ void visit(const ir::operation::Rank &) override;
void visit(const ir::operation::MatrixBandPart &) override;
void visit(const ir::operation::BatchMatMul &) override;
void visit(const ir::operation::BatchToSpaceND &) override;
@@ -107,7 +91,6 @@ public:
void visit(const ir::operation::FusedBatchNorm &) override;
void visit(const ir::operation::LogSoftmax &) override;
void visit(const ir::operation::SpaceToBatchND &) override;
- void visit(const ir::operation::Quantize &) override;
void visit(const ir::operation::SpaceToDepth &) override;
void visit(const ir::operation::StatelessRandomUniform &) override;
void visit(const ir::operation::SplitV &) override;
@@ -116,6 +99,7 @@ private:
const ir::Operands &_ctx;
const ir::Operations &_operations_ctx;
std::shared_ptr<TensorBuilder> _tensor_builder;
+ std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
ir::Layout _current_op_seq_layout;
const std::shared_ptr<ExternalContext> _external_context;
diff --git a/runtime/onert/backend/cpu/TensorBuilder.cc b/runtime/onert/backend/cpu/TensorBuilder.cc
index ab8ba5756..828d52f7c 100644
--- a/runtime/onert/backend/cpu/TensorBuilder.cc
+++ b/runtime/onert/backend/cpu/TensorBuilder.cc
@@ -27,8 +27,8 @@ namespace backend
namespace cpu
{
-TensorBuilder::TensorBuilder()
- : _tensor_reg{new cpu_common::TensorRegistry()},
+TensorBuilder::TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg)
+ : _tensor_reg{tensor_reg},
_dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)},
_static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
{
@@ -57,7 +57,7 @@ void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
const auto tensor_info = _tensor_info_map.at(ind);
- if (!at(ind)->is_dynamic())
+ if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
{
const auto size = tensor_info.total_size();
_static_tensor_mgr->claimPlan(ind, size);
@@ -66,7 +66,7 @@ void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
{
- if (!at(ind)->is_dynamic())
+ if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
{
_static_tensor_mgr->releasePlan(ind);
}
@@ -85,29 +85,6 @@ void TensorBuilder::allocate()
// This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
}
-std::shared_ptr<ITensor> TensorBuilder::tensorAt(const ir::OperandIndex &ind)
-{
- return _tensor_reg->getITensor(ind);
-}
-
-std::shared_ptr<IPortableTensor> TensorBuilder::portableAt(const ir::OperandIndex &ind)
-{
- return _tensor_reg->getPortableTensor(ind);
-}
-
-bool TensorBuilder::setMigrantTensor(const ir::OperandIndex &ind,
- const std::shared_ptr<IPortableTensor> &tensor)
-{
- return _tensor_reg->setMigrantTensor(ind, tensor);
-}
-
-void TensorBuilder::iterate(const IterateFunction &fn) { _static_tensor_mgr->iterate(fn); }
-
-std::shared_ptr<Tensor> TensorBuilder::at(const ir::OperandIndex &ind)
-{
- return _tensor_reg->getNativeTensor(ind);
-}
-
std::unique_ptr<ITensorManager> TensorBuilder::releaseStaticTensorManager(void)
{
return std::move(_static_tensor_mgr);
diff --git a/runtime/onert/backend/cpu/TensorBuilder.h b/runtime/onert/backend/cpu/TensorBuilder.h
index 617136514..b6d5f09cc 100644
--- a/runtime/onert/backend/cpu/TensorBuilder.h
+++ b/runtime/onert/backend/cpu/TensorBuilder.h
@@ -38,9 +38,7 @@ namespace cpu
class TensorBuilder : public ITensorBuilder
{
public:
- TensorBuilder();
-
- bool supportDynamicTensor() override { return true; }
+ TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg);
/**
* @brief Register tensor information to allocate on CPU backend
@@ -60,34 +58,12 @@ public:
void allocate() override;
void postFunctionPrepare() override { /* DO NOTHING */}
- /**
- * @brief Get tensor with a specific OperandIndex
- *
- * @return shared_ptr<ITensor> if a tensor with given OperandIndex exists. nullptr otherwise.
- */
- std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) override;
-
- void iterate(const IterateFunction &fn) override;
-
std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) override;
IDynamicTensorManager *dynamicTensorManager(void) override { return _dynamic_tensor_mgr.get(); }
std::unique_ptr<ITensorManager> releaseDynamicTensorManager(void) override;
- /**
- * @brief Get tensor with a specific OperandIndex.
- * @param ind OperandIndex for the tensor. There must exist a tensor with this ind.
- * If not, program will crash with assert or exception.
- * @return shared_ptr<Tensor>
- */
- std::shared_ptr<Tensor> at(const ir::OperandIndex &ind);
- std::shared_ptr<IPortableTensor> portableAt(const ir::OperandIndex &ind);
- bool setMigrantTensor(const ir::OperandIndex &ind,
- const std::shared_ptr<IPortableTensor> &tensor) override;
-
- std::shared_ptr<ITensorRegistry> tensorRegistry() override { return _tensor_reg; }
-
private:
const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr;
diff --git a/runtime/onert/backend/cpu/ops/AbsLayer.cc b/runtime/onert/backend/cpu/ops/AbsLayer.cc
deleted file mode 100644
index 322785aeb..000000000
--- a/runtime/onert/backend/cpu/ops/AbsLayer.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "AbsLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-AbsLayer::AbsLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void AbsLayer::absFloat32()
-{
- nnfw::cker::Abs(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void AbsLayer::absQuant8() { throw std::runtime_error{"NYI"}; }
-
-void AbsLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void AbsLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- absFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- absQuant8();
- }
- else
- {
- throw std::runtime_error{"Abs: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/AbsLayer.h b/runtime/onert/backend/cpu/ops/AbsLayer.h
deleted file mode 100644
index feb5f35ae..000000000
--- a/runtime/onert/backend/cpu/ops/AbsLayer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_ABSLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_ABSLAYER_H__
-
-#include "backend/IPortableTensor.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class AbsLayer : public ::onert::exec::IFunction
-{
-public:
- AbsLayer();
-
-public:
- void absFloat32();
-
- void absQuant8();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_ABSLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/AddLayer.cc b/runtime/onert/backend/cpu/ops/AddLayer.cc
deleted file mode 100644
index 379215303..000000000
--- a/runtime/onert/backend/cpu/ops/AddLayer.cc
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "AddLayer.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-void AddLayer::addFloat32()
-{
- float output_activation_min = 0, output_activation_max = 0;
- CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.float_activation_max = output_activation_max;
- op_params.float_activation_min = output_activation_min;
-
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
- if (need_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void AddLayer::addInt32()
-{
- int32_t output_activation_min = 0, output_activation_max = 0;
- CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.quantized_activation_max = output_activation_max;
- op_params.quantized_activation_min = output_activation_min;
-
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
- if (need_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer()));
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer()));
-}
-
-void AddLayer::addQuant8()
-{
- int32_t output_activation_min, output_activation_max;
- CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
- &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.quantized_activation_max = output_activation_max;
- op_params.quantized_activation_min = output_activation_min;
- // Parameters for scaled quantized computation
- op_params.left_shift = 20;
- // Zero-points of input and output tensors
- op_params.input1_offset = -_lhs->data_offset();
- op_params.input2_offset = -_rhs->data_offset();
- op_params.output_offset = _output->data_offset();
- assert((op_params.input1_offset >= 0) && (op_params.input1_offset <= 255));
- assert((op_params.input2_offset >= 0) && (op_params.input2_offset <= 255));
- assert((op_params.output_offset >= 0) && (op_params.output_offset <= 255));
-
- // Compute normalized scale for _lhs and _rhs values,
- // and represent in 32-bit fixed point
- const double norm_max_scale = 2 * std::max(_lhs->data_scale(), _rhs->data_scale());
- const double real_lhs_scale = _lhs->data_scale() / norm_max_scale;
- const double real_rhs_scale = _rhs->data_scale() / norm_max_scale;
- // output scale is used to normalize final result, so we invert the scale here
- const double real_output_scale =
- norm_max_scale / (_output->data_scale() * (1 << op_params.left_shift));
-
- // Represent the scales as fixed int32_t multipliers, and int32_t shifts
- QuantizeMultiplier(real_lhs_scale, &op_params.input1_multiplier, &op_params.input1_shift);
- QuantizeMultiplier(real_rhs_scale, &op_params.input2_multiplier, &op_params.input2_shift);
- QuantizeMultiplier(real_output_scale, &op_params.output_multiplier, &op_params.output_shift);
-
- // cker quant8 add is not implemented yet
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
- if (need_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void AddLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- const ir::Activation activation, IPortableTensor *output)
-{
- assert(lhs != nullptr);
- assert(rhs != nullptr);
- assert(output != nullptr);
-
- _lhs = lhs;
- _rhs = rhs;
- _activation = activation;
- _output = output;
-}
-
-void AddLayer::run()
-{
- if (_lhs->data_type() == OperandType::FLOAT32)
- {
- addFloat32();
- }
- else if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- addQuant8();
- }
- else if (_output->data_type() == OperandType::INT32)
- {
- addInt32();
- }
- else
- {
- throw std::runtime_error{"Add: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/AddLayer.h b/runtime/onert/backend/cpu/ops/AddLayer.h
deleted file mode 100644
index 91030d93a..000000000
--- a/runtime/onert/backend/cpu/ops/AddLayer.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_ADDLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_ADDLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class AddLayer : public ::onert::exec::IFunction
-{
-public:
- AddLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
- {
- // DO NOTHING
- }
-
-public:
- void addFloat32();
-
- void addQuant8();
-
- void addInt32();
-
- void configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- const ir::Activation activation, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_lhs;
- const IPortableTensor *_rhs;
- IPortableTensor *_output;
-
- ir::Activation _activation{ir::Activation::NONE};
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_ADDLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/AvgPoolLayer.cc b/runtime/onert/backend/cpu/ops/AvgPoolLayer.cc
deleted file mode 100644
index 9c22c1c86..000000000
--- a/runtime/onert/backend/cpu/ops/AvgPoolLayer.cc
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "AvgPoolLayer.h"
-
-#include <cker/operation/AveragePool.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-#define AVGPOOLING_PARAMETERS \
- nnfw::cker::PoolParams op_params; \
- op_params.stride_height = _strideHeight; \
- op_params.stride_width = _strideWidth; \
- op_params.filter_height = _kernelHeight; \
- op_params.filter_width = _kernelWidth; \
- op_params.padding_values.height = (int8_t)_paddingTop; \
- op_params.padding_values.width = (int8_t)_paddingLeft;
-
-AvgPoolLayer::AvgPoolLayer()
- : _input(nullptr), _output(nullptr), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
- _paddingBottom(0), _strideWidth(0), _strideHeight(0), _kernelWidth(0), _kernelHeight(0),
- _activation(ir::Activation::NONE)
-{
- // DO NOTHING
-}
-
-void AvgPoolLayer::averagePoolFloat32()
-{
- AVGPOOLING_PARAMETERS
- float output_activation_min = 0, output_activation_max = 0;
- CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
- op_params.float_activation_min = output_activation_min;
- op_params.float_activation_max = output_activation_max;
-
- nnfw::cker::AveragePool(op_params, getTensorShape(_input),
- reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-void AvgPoolLayer::averagePoolQuant8()
-{
- AVGPOOLING_PARAMETERS
- int32_t output_activation_min = 0;
- int32_t output_activation_max = 0;
- CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
- &output_activation_max);
- op_params.quantized_activation_min = output_activation_min;
- op_params.quantized_activation_max = output_activation_max;
-
- nnfw::cker::AveragePool(op_params, getTensorShape(_input),
- reinterpret_cast<const uint8_t *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void AvgPoolLayer::configure(const IPortableTensor *input, const uint32_t paddingLeft,
- const uint32_t paddingRight, const uint32_t paddingTop,
- const uint32_t paddingBottom, const uint32_t strideWidth,
- const uint32_t strideHeight, const uint32_t kernelWidth,
- const uint32_t kernelHeight, const ir::Activation activation,
- IPortableTensor *output)
-{
- assert(input != nullptr);
- assert(output != nullptr);
-
- _input = input;
- _paddingLeft = paddingLeft;
- _paddingRight = paddingRight;
- _paddingTop = paddingTop;
- _paddingBottom = paddingBottom;
- _strideWidth = strideWidth;
- _strideHeight = strideHeight;
- _kernelWidth = kernelWidth;
- _kernelHeight = kernelHeight;
- _activation = activation;
- _output = output;
-}
-
-void AvgPoolLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- averagePoolFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- averagePoolQuant8();
- }
- else
- {
- throw std::runtime_error{"AvgPool: unsupported data type"};
- }
-}
-
-#undef AVGPOOLING_PARAMETERS
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/AvgPoolLayer.h b/runtime/onert/backend/cpu/ops/AvgPoolLayer.h
deleted file mode 100644
index d4e8f79e7..000000000
--- a/runtime/onert/backend/cpu/ops/AvgPoolLayer.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_AVGPOOLLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_AVGPOOLLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class AvgPoolLayer : public ::onert::exec::IFunction
-{
-public:
- AvgPoolLayer();
-
-public:
- void averagePoolFloat32();
-
- void averagePoolQuant8();
-
- void configure(const IPortableTensor *input, const uint32_t paddingLeft,
- const uint32_t paddingRight, const uint32_t paddingTop,
- const uint32_t paddingBottom, const uint32_t strideWidth,
- const uint32_t strideHeight, const uint32_t kernelWidth,
- const uint32_t kernelHeight, const ir::Activation activation,
- IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-
- uint32_t _paddingLeft;
- uint32_t _paddingTop;
- uint32_t _paddingRight;
- uint32_t _paddingBottom;
-
- uint32_t _strideWidth;
- uint32_t _strideHeight;
- uint32_t _kernelWidth;
- uint32_t _kernelHeight;
-
- ir::Activation _activation;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_AVGPOOLLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc
new file mode 100644
index 000000000..f50c63375
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BinaryArithmeticLayer.h"
+
+#include <cker/operation/BinaryArithmeticOps.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+namespace
+{
+
+template <nnfw::cker::BinaryArithmeticOpType arithmetic_type, typename T>
+void eval(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output,
+ nnfw::cker::BinaryArithmeticOpParam op_params)
+{
+ const bool need_broadcast =
+ nnfw::cker::ProcessBroadcastShapes(getTensorShape(lhs), getTensorShape(rhs), &op_params);
+ if (need_broadcast)
+ {
+ nnfw::cker::BroadcastBinaryArithmeticOp<arithmetic_type>(
+ op_params, getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
+ reinterpret_cast<T *>(output->buffer()));
+ return;
+ }
+
+ nnfw::cker::BinaryArithmeticOp<arithmetic_type>(
+ op_params, getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
+ reinterpret_cast<T *>(output->buffer()));
+}
+
+template <nnfw::cker::BinaryArithmeticOpType arithmetic_type>
+std::function<void(const IPortableTensor *, const IPortableTensor *, IPortableTensor *)>
+generateKernelGeneric(const IPortableTensor *lhs, const ir::Activation activation,
+ nnfw::cker::BinaryArithmeticOpParam op_params)
+{
+ switch (lhs->data_type())
+ {
+ case OperandType::FLOAT32:
+ {
+ float output_activation_min = 0, output_activation_max = 0;
+ CalculateActivationRange(activation, &output_activation_min, &output_activation_max);
+ op_params.float_activation_max = output_activation_max;
+ op_params.float_activation_min = output_activation_min;
+ return std::bind(&eval<arithmetic_type, float>, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, op_params);
+ break;
+ }
+ case OperandType::INT32:
+ {
+ int32_t output_activation_min = 0, output_activation_max = 0;
+ CalculateActivationRange(activation, &output_activation_min, &output_activation_max);
+ op_params.quantized_activation_max = output_activation_max;
+ op_params.quantized_activation_min = output_activation_min;
+ return std::bind(eval<arithmetic_type, int32_t>, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, op_params);
+ break;
+ }
+ default:
+ throw std::runtime_error{"BinaryArithmetic(generic): Unsupported data type"};
+ }
+}
+
+void setAddOrSubQuant8Params(const IPortableTensor *lhs, const IPortableTensor *rhs,
+ IPortableTensor *output, ir::Activation activation,
+ nnfw::cker::BinaryArithmeticOpParam *params)
+{
+ int32_t output_activation_min, output_activation_max;
+ CalculateActivationRangeUint8(activation, output, &output_activation_min, &output_activation_max);
+ nnfw::cker::BinaryArithmeticOpParam &op_params = *params;
+ op_params.quantized_activation_max = output_activation_max;
+ op_params.quantized_activation_min = output_activation_min;
+ // Parameters for scaled quantized computation
+ op_params.left_shift = 20;
+ // Zero-points of input and output tensors
+ op_params.input1_offset = -lhs->data_offset();
+ op_params.input2_offset = -rhs->data_offset();
+ op_params.output_offset = output->data_offset();
+ assert((op_params.input1_offset >= 0) && (op_params.input1_offset <= 255));
+ assert((op_params.input2_offset >= 0) && (op_params.input2_offset <= 255));
+ assert((op_params.output_offset >= 0) && (op_params.output_offset <= 255));
+
+ // Compute normalized scale for _lhs and _rhs values,
+ // and represent in 32-bit fixed point
+ const double norm_max_scale = 2 * std::max(lhs->data_scale(), rhs->data_scale());
+ const double real_lhs_scale = lhs->data_scale() / norm_max_scale;
+ const double real_rhs_scale = rhs->data_scale() / norm_max_scale;
+ // output scale is used to normalize final result, so we invert the scale here
+ const double real_output_scale =
+ norm_max_scale / (output->data_scale() * (1 << op_params.left_shift));
+
+ // Represent the scales as fixed int32_t multipliers, and int32_t shifts
+ QuantizeMultiplier(real_lhs_scale, &op_params.input1_multiplier, &op_params.input1_shift);
+ QuantizeMultiplier(real_rhs_scale, &op_params.input2_multiplier, &op_params.input2_shift);
+ QuantizeMultiplier(real_output_scale, &op_params.output_multiplier, &op_params.output_shift);
+}
+
+void setMulQuant8Params(const IPortableTensor *lhs, const IPortableTensor *rhs,
+ IPortableTensor *output, ir::Activation activation,
+ nnfw::cker::BinaryArithmeticOpParam *params)
+{
+ int32_t output_activation_min, output_activation_max;
+ CalculateActivationRangeUint8(activation, output, &output_activation_min, &output_activation_max);
+ nnfw::cker::BinaryArithmeticOpParam &op_params = *params;
+
+ op_params.quantized_activation_max = output_activation_max;
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.input1_offset = -lhs->data_offset();
+ op_params.input2_offset = -rhs->data_offset();
+ op_params.output_offset = output->data_offset();
+
+ double real_multiplier = lhs->data_scale() * rhs->data_scale() / output->data_scale();
+ QuantizeMultiplier(real_multiplier, &op_params.output_multiplier, &op_params.output_shift);
+}
+
+} // namespace
+
+void BinaryArithmeticLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
+ IPortableTensor *output, const ir::Activation activation,
+ const ArithmeticType arithmetic_type)
+{
+ assert(lhs != nullptr);
+ assert(rhs != nullptr);
+ assert(output != nullptr);
+
+ _lhs = lhs;
+ _rhs = rhs;
+ _output = output;
+
+ nnfw::cker::BinaryArithmeticOpParam op_params;
+ switch (arithmetic_type)
+ {
+ case ArithmeticType::kAdd:
+ if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ setAddOrSubQuant8Params(_lhs, _rhs, _output, activation, &op_params);
+ _kernel = std::bind(&eval<nnfw::cker::BinaryArithmeticOpType::ADD, uint8_t>,
+ std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+ op_params);
+ }
+ else
+ {
+ _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::ADD>(_lhs, activation,
+ op_params);
+ }
+ break;
+ case ArithmeticType::kSub:
+ if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ setAddOrSubQuant8Params(_lhs, _rhs, _output, activation, &op_params);
+ op_params.input2_multiplier *= -1;
+ _kernel = std::bind(&eval<nnfw::cker::BinaryArithmeticOpType::SUB, uint8_t>,
+ std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+ op_params);
+ }
+ else
+ {
+ _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::SUB>(_lhs, activation,
+ op_params);
+ }
+ break;
+ case ArithmeticType::kMul:
+ if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ nnfw::cker::BinaryArithmeticOpParam op_params;
+ setMulQuant8Params(_lhs, _rhs, _output, activation, &op_params);
+ _kernel = std::bind(&eval<nnfw::cker::BinaryArithmeticOpType::MUL, uint8_t>,
+ std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+ op_params);
+ }
+ else
+ {
+ _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::MUL>(_lhs, activation,
+ op_params);
+ }
+ break;
+ case ArithmeticType::kDiv:
+ if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ throw std::runtime_error{
+ "BinaryArithmetic(Div): Div operation does not support quantization"};
+ }
+ else if (_lhs->data_type() == OperandType::INT32)
+ {
+ throw std::runtime_error{"BinaryArithmetic(Div): Unsupported data type"};
+ }
+ else
+ {
+ _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::DIV>(_lhs, activation,
+ op_params);
+ }
+ break;
+ default:
+ throw std::runtime_error{"BinaryArithmetic: Unsupported BinaryArithmetic type"};
+ }
+}
+
+void BinaryArithmeticLayer::run() { _kernel(_lhs, _rhs, _output); }
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/DivLayer.h b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.h
index 9411be76e..d6b33ad07 100644
--- a/runtime/onert/backend/cpu/ops/DivLayer.h
+++ b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CPU_OPS_DIVLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_DIVLAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_BINARYARITHMETICLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_BINARYARITHMETICLAYER_H__
#include <backend/IPortableTensor.h>
#include "OperationUtils.h"
@@ -31,21 +31,25 @@ namespace cpu
namespace ops
{
-class DivLayer : public ::onert::exec::IFunction
+enum class ArithmeticType
+{
+ kAdd,
+ kSub,
+ kMul,
+ kDiv,
+};
+
+class BinaryArithmeticLayer : public ::onert::exec::IFunction
{
public:
- DivLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
+ BinaryArithmeticLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
{
// DO NOTHING
}
public:
- void divFloat32();
-
- void divQuant8();
-
- void configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- const ir::Activation activation, IPortableTensor *output);
+ void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output,
+ const ir::Activation activation, const ArithmeticType arithmetic_type);
void run() override;
@@ -54,7 +58,7 @@ private:
const IPortableTensor *_rhs;
IPortableTensor *_output;
- ir::Activation _activation{ir::Activation::NONE};
+ std::function<void(const IPortableTensor *, const IPortableTensor *, IPortableTensor *)> _kernel;
};
} // namespace ops
@@ -62,4 +66,4 @@ private:
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CPU_OPS_DIVLAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_BINARYARITHMETICLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/CastLayer.cc b/runtime/onert/backend/cpu/ops/CastLayer.cc
deleted file mode 100644
index 497515606..000000000
--- a/runtime/onert/backend/cpu/ops/CastLayer.cc
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "CastLayer.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-CastLayer::CastLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void CastLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-template <typename FromT, typename ToT> void CastLayer::castTensor(const FromT *in, ToT *out)
-{
- auto input_shape = getTensorShape(_input);
- auto output_shape = getTensorShape(_output);
- const auto num_elements = MatchingFlatSize(input_shape, output_shape);
-
- std::transform(in, in + num_elements, out, [](FromT a) { return static_cast<ToT>(a); });
-}
-
-template <typename FromT> void CastLayer::castPtr(const FromT *in, DataPtr out)
-{
- switch (_output->data_type())
- {
- case ir::DataType::FLOAT32:
- castTensor(in, out.f);
- return;
- case ir::DataType::INT32:
- castTensor(in, out.i32);
- return;
- case ir::DataType::UINT32:
- castTensor(in, out.u32);
- return;
- case ir::DataType::UINT8:
- castTensor(in, out.u8);
- return;
- case ir::DataType::BOOL8:
- castTensor(in, out.b);
- return;
- case ir::DataType::INT64:
- castTensor(in, out.i64);
- return;
- default:
- throw std::runtime_error("Not supported output type" +
- std::to_string((int)_output->data_type()));
- }
-}
-
-void CastLayer::run()
-{
- auto input_buf = _input->buffer();
- auto output_buf = _output->buffer();
- const auto in = *reinterpret_cast<const DataPtr *>(&input_buf);
- auto out = *reinterpret_cast<DataPtr *>(&output_buf);
-
- switch (_input->data_type())
- {
- case ir::DataType::FLOAT32:
- castPtr(in.f, out);
- return;
- case ir::DataType::INT32:
- castPtr(in.i32, out);
- return;
- case ir::DataType::UINT32:
- castPtr(in.u32, out);
- return;
- case ir::DataType::UINT8:
- castPtr(in.u8, out);
- return;
- case ir::DataType::BOOL8:
- castPtr(in.b, out);
- return;
- case ir::DataType::INT64:
- castPtr(in.i64, out);
- return;
- default:
- throw std::runtime_error("Cast: unsupported data type" +
- std::to_string((int)_input->data_type()));
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/CastLayer.h b/runtime/onert/backend/cpu/ops/CastLayer.h
deleted file mode 100644
index 290c722e2..000000000
--- a/runtime/onert/backend/cpu/ops/CastLayer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_CASTLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_CASTLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class CastLayer : public ::onert::exec::IFunction
-{
-public:
- CastLayer();
-
-public:
- template <typename FromT, typename ToT> void castTensor(const FromT *in, ToT *out);
- template <typename FromT> void castPtr(const FromT *in, DataPtr out);
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_CASTLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
index 2d5bbef1e..c057267d3 100644
--- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
@@ -31,7 +31,8 @@ namespace ops
ConvolutionLayer::ConvolutionLayer()
: _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
_paddingType(ir::PaddingType::EXPLICIT), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
- _paddingBottom(0), _strideWidth(0), _strideHeight(0), _activation(ir::Activation::NONE),
+ _paddingBottom(0), _strideWidth(0), _strideHeight(0), _dilationWidthFactor(1),
+ _dilationHeightFactor(1), _activation(ir::Activation::NONE),
_conv_kernel(new nnfw::cker::Conv()), _prepare(false)
{
// DO NOTHING
@@ -50,8 +51,8 @@ void ConvolutionLayer::convFloat32()
op_params.padding_values.height = _paddingTop;
op_params.stride_width = _strideWidth;
op_params.stride_height = _strideHeight;
- op_params.dilation_width_factor = 1;
- op_params.dilation_height_factor = 1;
+ op_params.dilation_width_factor = _dilationWidthFactor;
+ op_params.dilation_height_factor = _dilationHeightFactor;
op_params.float_activation_min = output_activation_min;
op_params.float_activation_max = output_activation_max;
@@ -78,8 +79,8 @@ void ConvolutionLayer::convQuant8()
nnfw::cker::ConvParams op_params;
op_params.stride_width = _strideWidth;
op_params.stride_height = _strideHeight;
- op_params.dilation_width_factor = 1;
- op_params.dilation_height_factor = 1;
+ op_params.dilation_width_factor = _dilationWidthFactor;
+ op_params.dilation_height_factor = _dilationHeightFactor;
op_params.padding_type = getPaddingType(_paddingType);
op_params.padding_values.width = _paddingLeft;
op_params.padding_values.height = _paddingTop;
@@ -104,6 +105,8 @@ void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTe
const uint32_t paddingLeft, const uint32_t paddingRight,
const uint32_t paddingTop, const uint32_t paddingBottom,
const uint32_t strideWidth, const uint32_t strideHeight,
+ const uint32_t dilationWidthFactor,
+ const uint32_t dilationHeightFactor,
const ir::Activation activation, IPortableTensor *output)
{
_input = input;
@@ -116,6 +119,8 @@ void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTe
_paddingBottom = paddingBottom;
_strideWidth = strideWidth;
_strideHeight = strideHeight;
+ _dilationWidthFactor = dilationWidthFactor;
+ _dilationHeightFactor = dilationHeightFactor;
_activation = activation;
_output = output;
}
@@ -145,7 +150,8 @@ void ConvolutionLayer::run()
param_padding.param.bottom = _paddingBottom;
const auto padding =
- ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height);
+ ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+ _dilationWidthFactor, _dilationHeightFactor);
_paddingLeft = padding.left;
_paddingRight = padding.right;
@@ -176,7 +182,8 @@ void ConvolutionLayer::prepare()
{
bool is_transposed = false;
kernel.prepare(getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()),
- getPaddingType(_paddingType), is_transposed);
+ getPaddingType(_paddingType), is_transposed, _dilationWidthFactor,
+ _dilationHeightFactor);
// Decrease reference of _kernel(weights) only when _kernel is constant
if (is_transposed)
diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.h b/runtime/onert/backend/cpu/ops/ConvolutionLayer.h
index 2833387c4..398892e65 100644
--- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.h
+++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.h
@@ -56,7 +56,8 @@ public:
const IPortableTensor *bias, ir::PaddingType _paddingType,
const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
const uint32_t paddingBottom, const uint32_t strideWidth,
- const uint32_t strideHeight, const ir::Activation activation,
+ const uint32_t strideHeight, const uint32_t dilationWidthFactor,
+ const uint32_t dilationHeightFactor, const ir::Activation activation,
IPortableTensor *output);
void run() override;
@@ -77,6 +78,8 @@ private:
uint32_t _strideWidth;
uint32_t _strideHeight;
+ uint32_t _dilationWidthFactor;
+ uint32_t _dilationHeightFactor;
ir::Activation _activation;
diff --git a/runtime/onert/backend/cpu/ops/CosLayer.cc b/runtime/onert/backend/cpu/ops/CosLayer.cc
deleted file mode 100644
index 9417019d5..000000000
--- a/runtime/onert/backend/cpu/ops/CosLayer.cc
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "CosLayer.h"
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-CosLayer::CosLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void CosLayer::cosFloat32()
-{
- nnfw::cker::Cos(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void CosLayer::cosQuant8() { throw std::runtime_error{"NYI"}; }
-
-void CosLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void CosLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- cosFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- cosQuant8();
- }
- else
- {
- throw std::runtime_error{"Cos: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/CosLayer.h b/runtime/onert/backend/cpu/ops/CosLayer.h
deleted file mode 100644
index 1fadef718..000000000
--- a/runtime/onert/backend/cpu/ops/CosLayer.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_COSLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_COSLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class CosLayer : public ::onert::exec::IFunction
-{
-public:
- CosLayer();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- void cosFloat32();
- void cosQuant8();
-
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_COSLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/DivLayer.cc b/runtime/onert/backend/cpu/ops/DivLayer.cc
deleted file mode 100644
index 556c55e33..000000000
--- a/runtime/onert/backend/cpu/ops/DivLayer.cc
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DivLayer.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-void DivLayer::divFloat32()
-{
- float output_activation_min = 0, output_activation_max = 0;
- CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.float_activation_max = output_activation_max;
- op_params.float_activation_min = output_activation_min;
-
- const bool requires_broadcast = !HaveSameShapes(_lhs, _rhs);
- if (requires_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::DIV>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
- }
- else
- {
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::DIV>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
- }
-}
-
-void DivLayer::divQuant8()
-{
- int32_t output_activation_min, output_activation_max;
- CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
- &output_activation_max);
- // op_params.quantized_activation_max = output_activation_max;
- // op_params.quantized_activation_min = output_activation_min;
-
- // cker quant8 div is not implemented yet
- throw std::runtime_error{"Div NYI for quantized"};
-}
-
-void DivLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- const ir::Activation activation, IPortableTensor *output)
-{
- _lhs = lhs;
- _rhs = rhs;
- _activation = activation;
- _output = output;
-}
-
-void DivLayer::run()
-{
- if (_output->data_type() == OperandType::FLOAT32)
- {
- divFloat32();
- }
- else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- divQuant8();
- }
- else
- {
- throw std::runtime_error{"Div: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc
new file mode 100644
index 000000000..c1d63172b
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ElementwiseActivationLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/Logistic.h>
+#include <cker/operation/ReLU.h>
+#include <cker/operation/ReLU6.h>
+#include <cker/operation/Tanh.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+ElementwiseActivationLayer::ElementwiseActivationLayer()
+ : _input(nullptr), _output(nullptr), _kernel()
+{
+ // DO NOTHING
+}
+
+void ElementwiseActivationLayer::PopulateLookupTable(const ElementwiseActivationType op_type)
+{
+ const auto input_scale = static_cast<double>(_input->data_scale());
+ const auto input_zero_point = static_cast<int32_t>(_input->data_offset());
+ const auto output_scale = static_cast<double>(_output->data_scale());
+ const auto output_zero_point = static_cast<int32_t>(_output->data_offset());
+ const float inverse_scale = 1 / output_scale;
+ int32_t maxval = std::numeric_limits<uint8_t>::max();
+ int32_t minval = std::numeric_limits<uint8_t>::min();
+ for (int32_t val = minval; val <= maxval; ++val)
+ {
+ const float dequantized = input_scale * (val - input_zero_point);
+ float transformed = 0.f;
+ if (op_type == ElementwiseActivationType::kTanh)
+ {
+ transformed = std::tanh(dequantized);
+ }
+ else if (op_type == ElementwiseActivationType::kLogistic)
+ {
+ transformed = 1.0f / (1.0f + std::exp(-dequantized));
+ }
+ else
+ {
+ throw std::runtime_error("ElementwiseActivationLayer : unsupported activation type");
+ }
+ const float rescaled = std::round(transformed * inverse_scale);
+ const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
+ _table[val] = static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval));
+ }
+}
+
+void ElementwiseActivationLayer::EvalUsingLookupTable(const IPortableTensor *input,
+ IPortableTensor *output)
+{
+ const int size = MatchingFlatSize(getTensorShape(input), getTensorShape(output));
+ const uint8_t *input_data = reinterpret_cast<const uint8_t *>(input->buffer());
+ uint8_t *output_data = reinterpret_cast<uint8_t *>(output->buffer());
+
+ for (int i = 0; i < size; ++i)
+ {
+ output_data[i] = _table[input_data[i]];
+ }
+}
+
+void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortableTensor *output,
+ float alpha, float beta,
+ ElementwiseActivationType op_type)
+{
+ _input = input;
+ _output = output;
+
+ switch (op_type)
+ {
+ case ElementwiseActivationType::kLogistic:
+ if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ PopulateLookupTable(op_type);
+ _kernel = std::bind(&ElementwiseActivationLayer::EvalUsingLookupTable, this,
+ std::placeholders::_1, std::placeholders::_2);
+ }
+ else if (_input->data_type() == OperandType::FLOAT32)
+ {
+ _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+ nnfw::cker::Logistic(getTensorShape(input),
+ reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+ };
+ }
+ else
+ {
+ throw std::runtime_error{"ElementwiseActivationLayer(Logistic): unsupported data type"};
+ }
+ break;
+ case ElementwiseActivationType::kReLU:
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ if (alpha == std::numeric_limits<float>::infinity() && beta == 0.f)
+ {
+ _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+ nnfw::cker::ReLU(getTensorShape(input),
+ reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+ };
+ }
+ else if (alpha == 6.f && beta == 0.f)
+ {
+ _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+ nnfw::cker::ReLU6(getTensorShape(input),
+ reinterpret_cast<const float *>(input->buffer()),
+ reinterpret_cast<float *>(output->buffer()));
+ };
+ }
+ else
+ {
+ throw std::runtime_error(
+ "ElementwiseActivationLayer : This layer suppports only ReLU(0-inf) and ReLU6(0-6)");
+ }
+ }
+ else
+ {
+ throw std::runtime_error{"ElementwiseActivationLayer(ReLU): unsupported data type"};
+ }
+ break;
+ case ElementwiseActivationType::kTanh:
+ if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ PopulateLookupTable(op_type);
+ _kernel = std::bind(&ElementwiseActivationLayer::EvalUsingLookupTable, this,
+ std::placeholders::_1, std::placeholders::_2);
+ }
+ else if (_input->data_type() == OperandType::FLOAT32)
+ {
+ _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+ nnfw::cker::Tanh(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+ };
+ }
+ else
+ {
+ throw std::runtime_error{"ElementwiseActivationLayer(Logistic): unsupported data type"};
+ }
+ break;
+ default:
+ throw std::runtime_error("ElementwiseActivationLayer: unsupported op type");
+ }
+}
+
+void ElementwiseActivationLayer::run() { _kernel(_input, _output); }
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/TanhLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h
index 35a184074..3ef580041 100644
--- a/runtime/onert/backend/cpu/ops/TanhLayer.h
+++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CPU_OPS_TANHLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_TANHLAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_ElementwiseActivationLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_ElementwiseActivationLAYER_H__
#include <backend/IPortableTensor.h>
@@ -30,26 +30,33 @@ namespace cpu
namespace ops
{
-class TanhLayer : public ::onert::exec::IFunction
+enum class ElementwiseActivationType
{
-public:
- TanhLayer();
+ kLogistic,
+ kReLU,
+ kTanh
+};
+class ElementwiseActivationLayer : public ::onert::exec::IFunction
+{
public:
- void tanhFloat32();
+ ElementwiseActivationLayer();
- void tanhQuant8();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
+public:
+ void configure(const IPortableTensor *input, IPortableTensor *output, float alpha, float beta,
+ const ElementwiseActivationType op_type);
void run() override;
- void PopulateLookupTable();
+ void PopulateLookupTable(const ElementwiseActivationType op_type);
+
+ void EvalUsingLookupTable(const IPortableTensor *input, IPortableTensor *output);
private:
const IPortableTensor *_input;
IPortableTensor *_output;
uint8_t _table[256];
+ std::function<void(const IPortableTensor *input, IPortableTensor *output)> _kernel;
};
} // namespace ops
@@ -57,4 +64,4 @@ private:
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CPU_OPS_TANHLAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_ElementwiseActivationLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc
new file mode 100644
index 000000000..ea3c1e7cd
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ElementwiseBinaryLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/LogicalOr.h>
+#include <cker/operation/MaxMin.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+namespace
+{
+template <typename T>
+void logicalOrGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs,
+ IPortableTensor *output)
+{
+ if (!HaveSameShapes(lhs, rhs))
+ {
+ nnfw::cker::LogicalOrBroadcast<T>(
+ getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), getTensorShape(rhs),
+ reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
+ reinterpret_cast<T *>(output->buffer()));
+ }
+ else
+ {
+ nnfw::cker::LogicalOrElementwise<T>(
+ getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ reinterpret_cast<const T *>(rhs->buffer()), reinterpret_cast<T *>(output->buffer()));
+ }
+}
+
+template <typename T>
+void maximumGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output)
+{
+ nnfw::cker::Max<T>(getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+ getTensorShape(output), reinterpret_cast<T *>(output->buffer()));
+}
+
+template <typename T>
+void minimumGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output)
+{
+ nnfw::cker::Min<T>(getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+ getTensorShape(output), reinterpret_cast<T *>(output->buffer()));
+}
+
+bool haveSameQauntInfo(const IPortableTensor *lhs, const IPortableTensor *rhs,
+ const IPortableTensor *output)
+{
+ return (lhs->data_scale() == rhs->data_scale() && lhs->data_scale() == output->data_scale()) &&
+ (lhs->data_offset() == rhs->data_offset() && lhs->data_offset() == output->data_offset());
+}
+} // namespace
+
+void ElementwiseBinaryLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
+ IPortableTensor *output, const ElementwiseBinaryType op_type)
+{
+ assert(lhs != nullptr);
+ assert(rhs != nullptr);
+ assert(output != nullptr);
+
+ _lhs = lhs;
+ _rhs = rhs;
+ _output = output;
+
+ switch (op_type)
+ {
+ case ElementwiseBinaryType::kLogicalOr:
+ if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8))
+ {
+ _kernel = logicalOrGeneric<bool>;
+ }
+ else
+ {
+ throw std::runtime_error{"LogicalOr: Unsupported data type"};
+ }
+ break;
+ case ElementwiseBinaryType::kMax:
+ if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ if (!haveSameQauntInfo(_lhs, _rhs, _output))
+ {
+ throw std::runtime_error("Max NYI for quantized");
+ }
+ _kernel = maximumGeneric<uint8_t>;
+ }
+ else if (_lhs->data_type() == OperandType::FLOAT32)
+ {
+ _kernel = maximumGeneric<float>;
+ }
+ else
+ {
+ throw std::runtime_error{"Max: unsupported data type"};
+ }
+ break;
+ case ElementwiseBinaryType::kMin:
+ if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ if (!haveSameQauntInfo(_lhs, _rhs, _output))
+ {
+ throw std::runtime_error("Min NYI for quantized");
+ }
+ _kernel = minimumGeneric<uint8_t>;
+ }
+ else if (_lhs->data_type() == OperandType::INT32)
+ {
+ _kernel = minimumGeneric<int32_t>;
+ }
+ else if (_lhs->data_type() == OperandType::FLOAT32)
+ {
+ _kernel = minimumGeneric<float>;
+ }
+ else
+ {
+ throw std::runtime_error{"Min: unsupported data type"};
+ }
+ break;
+ default:
+ throw std::runtime_error{"ElementwiseBinary: Unsupported ElementwiseBinary type"};
+ }
+}
+
+void ElementwiseBinaryLayer::run() { _kernel(_lhs, _rhs, _output); }
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MaxLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.h
index ed8dc5b0f..052747a4c 100644
--- a/runtime/onert/backend/cpu/ops/MaxLayer.h
+++ b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CPU_OPS_MAXLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_MAXLAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_ELEMENTWISEBINARYLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_ELEMENTWISEBINARYLAYER_H__
#include <backend/IPortableTensor.h>
@@ -30,20 +30,25 @@ namespace cpu
namespace ops
{
-class MaxLayer : public ::onert::exec::IFunction
+enum class ElementwiseBinaryType
+{
+ kLogicalAnd,
+ kLogicalOr,
+ kMax,
+ kMin,
+};
+
+class ElementwiseBinaryLayer : public ::onert::exec::IFunction
{
public:
- MaxLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
+ ElementwiseBinaryLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
{
// DO NOTHING
}
public:
- template <typename T> void maximum();
-
- void maxQuant8();
-
- void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output);
+ void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output,
+ const ElementwiseBinaryType op_type);
void run() override;
@@ -51,6 +56,7 @@ private:
const IPortableTensor *_lhs;
const IPortableTensor *_rhs;
IPortableTensor *_output;
+ std::function<void(const IPortableTensor *, const IPortableTensor *, IPortableTensor *)> _kernel;
};
} // namespace ops
@@ -58,4 +64,4 @@ private:
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CPU_OPS_MAXLAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_ELEMENTWISEBINARYLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc
new file mode 100644
index 000000000..f8f89ab15
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc
@@ -0,0 +1,336 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ElementwiseUnaryLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/Elementwise.h>
+#include <cker/operation/Erf.h>
+#include <cker/operation/Exp.h>
+#include <cker/operation/LogicalNot.h>
+#include <cker/operation/Quantize.h>
+#include <cker/operation/Round.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+namespace
+{
+void absFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Abs(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+template <typename FromT>
+void castPtr(const FromT *in, DataPtr out, int num_elements, ir::DataType data_type_out)
+{
+ switch (data_type_out)
+ {
+ case ir::DataType::FLOAT32:
+ std::transform(in, in + num_elements, out.f, [](FromT a) { return static_cast<float>(a); });
+ return;
+ case ir::DataType::INT32:
+ std::transform(in, in + num_elements, out.i32,
+ [](FromT a) { return static_cast<int32_t>(a); });
+ return;
+ case ir::DataType::UINT32:
+ std::transform(in, in + num_elements, out.u32,
+ [](FromT a) { return static_cast<uint32_t>(a); });
+ return;
+ case ir::DataType::UINT8:
+ std::transform(in, in + num_elements, out.u8,
+ [](FromT a) { return static_cast<uint8_t>(a); });
+ return;
+ case ir::DataType::BOOL8:
+ std::transform(in, in + num_elements, out.b, [](FromT a) { return static_cast<bool>(a); });
+ return;
+ case ir::DataType::INT64:
+ std::transform(in, in + num_elements, out.i64,
+ [](FromT a) { return static_cast<int64_t>(a); });
+ return;
+ default:
+ throw std::runtime_error("Cast: Not supported output type" +
+ std::to_string((int)data_type_out));
+ }
+}
+
+void cast(const IPortableTensor *input, IPortableTensor *output)
+{
+ auto input_buf = input->buffer();
+ auto output_buf = output->buffer();
+ const auto in = *reinterpret_cast<const DataPtr *>(&input_buf);
+ auto out = *reinterpret_cast<DataPtr *>(&output_buf);
+
+ auto input_shape = getTensorShape(input);
+ auto output_shape = getTensorShape(output);
+ const auto num_elements = MatchingFlatSize(input_shape, output_shape);
+
+ switch (input->data_type())
+ {
+ case ir::DataType::FLOAT32:
+ castPtr(in.f, out, num_elements, output->data_type());
+ return;
+ case ir::DataType::INT32:
+ castPtr(in.i32, out, num_elements, output->data_type());
+ return;
+ case ir::DataType::UINT32:
+ castPtr(in.u32, out, num_elements, output->data_type());
+ return;
+ case ir::DataType::UINT8:
+ castPtr(in.u8, out, num_elements, output->data_type());
+ return;
+ case ir::DataType::BOOL8:
+ castPtr(in.b, out, num_elements, output->data_type());
+ return;
+ case ir::DataType::INT64:
+ castPtr(in.i64, out, num_elements, output->data_type());
+ return;
+ default:
+ throw std::runtime_error("Cast: unsupported data type" +
+ std::to_string((int)input->data_type()));
+ }
+}
+
+void cosFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Cos(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void expFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Exp(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void erfFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Erf(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void logFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Log(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void logicalNot(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::LogicalNot(getTensorShape(input), reinterpret_cast<const bool *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
+}
+
+void negFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Neg(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+template <typename InputT, typename OutputT>
+void affineQuantize(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Quantize(getTensorShape(input), reinterpret_cast<const InputT *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<OutputT *>(output->buffer()),
+ output->data_scale(), output->data_offset());
+}
+
+void roundFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Round(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void rsqrtFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Rsqrt(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void sinFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Sin(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+template <typename T> void zerosLikeFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ if (!HaveSameShapes(input, output))
+ throw std::runtime_error{"ZerosLike: input and output shape don't match."};
+
+ auto element_size = getTensorShape(input).FlatSize();
+
+ memset(reinterpret_cast<T *>(output->buffer()), 0, element_size * sizeof(T));
+}
+} // namespace
+
+void ElementwiseUnaryLayer::configure(const IPortableTensor *input, IPortableTensor *output,
+ const ElementwiseUnaryType op_type)
+{
+ assert(input != nullptr);
+ assert(output != nullptr);
+
+ _input = input;
+ _output = output;
+
+ switch (op_type)
+ {
+ case ElementwiseUnaryType::kAbs:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = absFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Abs: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kCast:
+ _kernel = cast;
+ break;
+ case ElementwiseUnaryType::kCos:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = cosFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Cos: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kExp:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = expFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Exp: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kErf:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = erfFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Exp: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kLog:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = logFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Log: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kLogicalNot:
+ if ((input->data_type() == OperandType::BOOL8))
+ {
+ _kernel = logicalNot;
+ }
+ else
+ {
+ throw std::runtime_error{"LogicalNot: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kNeg:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = negFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Neg: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kQuantize:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = affineQuantize<float, uint8_t>;
+ }
+ else
+ {
+ throw std::runtime_error{"Quantize: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kRound:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = roundFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Round: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kRSqrt:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = rsqrtFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"RSqrt: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kSin:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = sinFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Sin: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kZerosLike:
+ if (input->data_type() == OperandType::FLOAT32)
+ {
+ _kernel = zerosLikeFloat32<float>;
+ }
+ else if (input->data_type() == OperandType::INT32)
+ {
+ _kernel = zerosLikeFloat32<int32_t>;
+ }
+ else
+ {
+ throw std::runtime_error{"ZerosLike: Unsupported data type"};
+ }
+ break;
+ default:
+ throw std::runtime_error{"ElementwiseBinary: Unsupported ElementwiseBinary type"};
+ }
+}
+
+void ElementwiseUnaryLayer::run() { _kernel(_input, _output); }
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ReLU6Layer.h b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h
index 994d17a30..74968386d 100644
--- a/runtime/onert/backend/cpu/ops/ReLU6Layer.h
+++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_ELEMENTWISEUNARYLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_ELEMENTWISEUNARYLAYER_H__
#include <backend/IPortableTensor.h>
@@ -30,23 +30,41 @@ namespace cpu
namespace ops
{
-class ReLU6Layer : public ::onert::exec::IFunction
+enum class ElementwiseUnaryType
{
-public:
- ReLU6Layer();
+ kAbs,
+ kCast,
+ kCos,
+ kErf,
+ kExp,
+ kLog,
+ kLogicalNot,
+ kNeg,
+ kQuantize,
+ kRound,
+ kRSqrt,
+ kSin,
+ kZerosLike
+};
+class ElementwiseUnaryLayer : public ::onert::exec::IFunction
+{
public:
- void relu6Float32();
+ ElementwiseUnaryLayer() : _input(nullptr), _output(nullptr), _kernel()
+ {
+ // DO NOTHING
+ }
- void relu6Quant8();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
+public:
+ void configure(const IPortableTensor *input, IPortableTensor *output,
+ const ElementwiseUnaryType op_type);
void run() override;
private:
const IPortableTensor *_input;
IPortableTensor *_output;
+ std::function<void(const IPortableTensor *, IPortableTensor *)> _kernel;
};
} // namespace ops
@@ -54,4 +72,4 @@ private:
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_ELEMENTWISEUNARYLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ExpLayer.cc b/runtime/onert/backend/cpu/ops/ExpLayer.cc
deleted file mode 100644
index 4dbec9cd5..000000000
--- a/runtime/onert/backend/cpu/ops/ExpLayer.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ExpLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Exp.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-ExpLayer::ExpLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void ExpLayer::expFloat32()
-{
- nnfw::cker::Exp(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void ExpLayer::expQuant8()
-{
- // cker quant8 exp is not implemented yet
- throw std::runtime_error{"NYI"};
-}
-
-void ExpLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void ExpLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- expFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- expQuant8();
- }
- else
- {
- throw std::runtime_error{"Exp: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/LogLayer.cc b/runtime/onert/backend/cpu/ops/LogLayer.cc
deleted file mode 100644
index 307c15bc4..000000000
--- a/runtime/onert/backend/cpu/ops/LogLayer.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LogLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-LogLayer::LogLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void LogLayer::logFloat32()
-{
- nnfw::cker::Log(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void LogLayer::logQuant8() { throw std::runtime_error{"NYI"}; }
-
-void LogLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void LogLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- logFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- logQuant8();
- }
- else
- {
- throw std::runtime_error{"Log: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/LogLayer.h b/runtime/onert/backend/cpu/ops/LogLayer.h
deleted file mode 100644
index 2f6b4b570..000000000
--- a/runtime/onert/backend/cpu/ops/LogLayer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_LOGLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_LOGLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class LogLayer : public ::onert::exec::IFunction
-{
-public:
- LogLayer();
-
-public:
- void logFloat32();
-
- void logQuant8();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_LOGLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc
index 06dde4fc4..1d7ee6caa 100644
--- a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc
+++ b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc
@@ -34,6 +34,16 @@ LogSoftMaxLayer::LogSoftMaxLayer() : _input(nullptr), _output(nullptr), _beta(0.
// DO NOTHING
}
+void LogSoftMaxLayer::PopulateLookupTable(const float kBeta)
+{
+ const float scale = -_input->data_scale() * kBeta;
+ const int32_t max_uint8 = std::numeric_limits<uint8_t>::max();
+ for (int32_t val = 0; val <= max_uint8; ++val)
+ {
+ _table[max_uint8 - val] = expf(scale * val);
+ }
+}
+
void LogSoftMaxLayer::logsoftmaxFloat32()
{
nnfw::cker::SoftmaxParams op_params;
@@ -46,7 +56,15 @@ void LogSoftMaxLayer::logsoftmaxFloat32()
void LogSoftMaxLayer::logsoftmaxQuant8()
{
- // NYI
+ nnfw::cker::SoftmaxParams op_params;
+ op_params.beta = _beta;
+ op_params.axis = _axis;
+ op_params.table = _table;
+ op_params.zero_point = _output->data_offset();
+ op_params.scale = _output->data_scale();
+ nnfw::cker::LogSoftmax(op_params, _input->data_scale(), getTensorShape(_input),
+ reinterpret_cast<const uint8_t *>(_input->buffer()),
+ getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
}
void LogSoftMaxLayer::configure(const IPortableTensor *input, const float beta, const int axis,
@@ -56,6 +74,10 @@ void LogSoftMaxLayer::configure(const IPortableTensor *input, const float beta,
_output = output;
_beta = beta;
_axis = axis;
+ if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ PopulateLookupTable(_beta);
+ }
}
void LogSoftMaxLayer::run()
@@ -66,7 +88,7 @@ void LogSoftMaxLayer::run()
}
else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
{
- throw std::runtime_error{"LogSoftmax : NYI"};
+ logsoftmaxQuant8();
}
else
{
diff --git a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h
index ba9deca17..1533f3361 100644
--- a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h
+++ b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h
@@ -45,12 +45,15 @@ public:
void run();
+ void PopulateLookupTable(const float kBeta);
+
private:
const IPortableTensor *_input;
IPortableTensor *_output;
float _beta;
int _axis;
+ float _table[256];
};
} // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/LogicalNotLayer.cc b/runtime/onert/backend/cpu/ops/LogicalNotLayer.cc
deleted file mode 100644
index f2192c148..000000000
--- a/runtime/onert/backend/cpu/ops/LogicalNotLayer.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LogicalNotLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/LogicalNot.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-LogicalNotLayer::LogicalNotLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void LogicalNotLayer::logicalNotBool8()
-{
- nnfw::cker::LogicalNot(getTensorShape(_input), reinterpret_cast<const bool *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<bool *>(_output->buffer()));
-}
-
-void LogicalNotLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void LogicalNotLayer::run()
-{
- if (_input->data_type() == OperandType::BOOL8)
- {
- logicalNotBool8();
- }
- else
- {
- throw std::runtime_error{"LogicalNot: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/LogicalNotLayer.h b/runtime/onert/backend/cpu/ops/LogicalNotLayer.h
deleted file mode 100644
index 5543cca3d..000000000
--- a/runtime/onert/backend/cpu/ops/LogicalNotLayer.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_LOGICALNOTLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_LOGICALNOTLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class LogicalNotLayer : public ::onert::exec::IFunction
-{
-public:
- LogicalNotLayer();
-
-public:
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- void logicalNotBool8();
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_LOGICALNOTLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/LogicalOrLayer.cc b/runtime/onert/backend/cpu/ops/LogicalOrLayer.cc
deleted file mode 100644
index 5b7c9f6f0..000000000
--- a/runtime/onert/backend/cpu/ops/LogicalOrLayer.cc
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LogicalOrLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/LogicalOr.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-void LogicalOrLayer::lorBool8()
-{
- if (!HaveSameShapes(_lhs, _rhs))
- {
- nnfw::cker::LogicalOrBroadcast<bool>(
- getTensorShape(_lhs), reinterpret_cast<const bool *>(_lhs->buffer()), getTensorShape(_rhs),
- reinterpret_cast<const bool *>(_rhs->buffer()), getTensorShape(_output),
- reinterpret_cast<bool *>(_output->buffer()));
- }
- else
- {
- nnfw::cker::LogicalOrElementwise<bool>(getTensorShape(_lhs),
- reinterpret_cast<const bool *>(_lhs->buffer()),
- reinterpret_cast<const bool *>(_rhs->buffer()),
- reinterpret_cast<bool *>(_output->buffer()));
- }
-}
-
-void LogicalOrLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- IPortableTensor *output)
-{
- assert(lhs != nullptr);
- assert(rhs != nullptr);
- assert(output != nullptr);
-
- _lhs = lhs;
- _rhs = rhs;
- _output = output;
-}
-
-void LogicalOrLayer::run()
-{
- if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8))
- {
- lorBool8();
- }
- else
- {
- throw std::runtime_error{"LogicalOr: Unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/LogicalOrLayer.h b/runtime/onert/backend/cpu/ops/LogicalOrLayer.h
deleted file mode 100644
index efaf396e8..000000000
--- a/runtime/onert/backend/cpu/ops/LogicalOrLayer.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_LOGICAL_OR_LAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_LOGICAL_OR_LAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class LogicalOrLayer : public ::onert::exec::IFunction
-{
-public:
- LogicalOrLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
- {
- // Nothing
- }
-
-public:
- void configure(const IPortableTensor *_lhs, const IPortableTensor *_rhs, IPortableTensor *output);
-
- void run() override;
-
-private:
- void lorBool8();
-
-private:
- const IPortableTensor *_lhs;
- const IPortableTensor *_rhs;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_LOGICAL_OR_LAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/LogisticLayer.cc b/runtime/onert/backend/cpu/ops/LogisticLayer.cc
deleted file mode 100644
index 140ab4d2c..000000000
--- a/runtime/onert/backend/cpu/ops/LogisticLayer.cc
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LogisticLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Logistic.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-LogisticLayer::LogisticLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void LogisticLayer::populateLookupTable()
-{
- const auto input_scale = static_cast<double>(_input->data_scale());
- const auto input_zero_point = static_cast<int32_t>(_input->data_offset());
- const auto output_scale = static_cast<double>(_output->data_scale());
- const auto output_zero_point = static_cast<int32_t>(_output->data_offset());
- const float inverse_scale = 1 / output_scale;
- int32_t maxval = std::numeric_limits<uint8_t>::max();
- int32_t minval = std::numeric_limits<uint8_t>::min();
- for (int32_t val = minval; val <= maxval; ++val)
- {
- const float dequantized = input_scale * (val - input_zero_point);
- const float transformed = 1.0f / (1.0f + std::exp(-dequantized));
- const float rescaled = std::round(transformed * inverse_scale);
- const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
- _table[val] = static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval));
- }
-}
-
-void LogisticLayer::logisticFloat32()
-{
- nnfw::cker::Logistic(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void LogisticLayer::logisticQuant8()
-{
- const int size = MatchingFlatSize(getTensorShape(_input), getTensorShape(_output));
- const uint8_t *input_data = reinterpret_cast<const uint8_t *>(_input->buffer());
- uint8_t *output_data = reinterpret_cast<uint8_t *>(_output->buffer());
-
- for (int i = 0; i < size; ++i)
- {
- output_data[i] = _table[input_data[i]];
- }
-}
-
-void LogisticLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-
- if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- if (_output->data_scale() != 1.f / 256)
- {
- throw std::runtime_error{"incorrect scale for output"};
- }
- populateLookupTable();
- }
-}
-
-void LogisticLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- logisticFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- logisticQuant8();
- }
- else
- {
- throw std::runtime_error{"Logistic: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/LogisticLayer.h b/runtime/onert/backend/cpu/ops/LogisticLayer.h
deleted file mode 100644
index cac77939d..000000000
--- a/runtime/onert/backend/cpu/ops/LogisticLayer.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_LOGISTICLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_LOGISTICLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class LogisticLayer : public ::onert::exec::IFunction
-{
-public:
- LogisticLayer();
-
-public:
- void logisticFloat32();
-
- void logisticQuant8();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
- void populateLookupTable();
-
- void run() override;
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-
- uint8_t _table[256];
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_LOGISTICLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/MaxLayer.cc b/runtime/onert/backend/cpu/ops/MaxLayer.cc
deleted file mode 100644
index 9631983be..000000000
--- a/runtime/onert/backend/cpu/ops/MaxLayer.cc
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MaxLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/MaxMin.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-template <typename T> void MaxLayer::maximum()
-{
- nnfw::cker::Max<T>(getTensorShape(_lhs), reinterpret_cast<const T *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const T *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()));
-}
-
-void MaxLayer::maxQuant8()
-{
- if (_lhs->data_scale() == _rhs->data_scale() && _lhs->data_scale() == _output->data_scale())
- {
- if (_lhs->data_offset() == _rhs->data_offset() && _lhs->data_offset() == _output->data_offset())
- {
- return nnfw::cker::Max<uint8_t>(
- getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
- }
- }
- throw std::runtime_error("Max NYI for quantized");
-}
-
-void MaxLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- IPortableTensor *output)
-{
- assert(lhs != nullptr);
- assert(rhs != nullptr);
- assert(output != nullptr);
-
- _lhs = lhs;
- _rhs = rhs;
- _output = output;
-}
-
-void MaxLayer::run()
-{
- if (_lhs->data_type() == OperandType::FLOAT32)
- {
- maximum<float>();
- }
- else if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- maxQuant8();
- }
- else
- {
- throw std::runtime_error{"Max: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MaxPoolLayer.cc b/runtime/onert/backend/cpu/ops/MaxPoolLayer.cc
deleted file mode 100644
index 1e983b408..000000000
--- a/runtime/onert/backend/cpu/ops/MaxPoolLayer.cc
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MaxPoolLayer.h"
-
-#include <cker/operation/MaxPool.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-#define MAXPOOLING_PARAMETERS \
- nnfw::cker::PoolParams op_params; \
- op_params.stride_height = _strideHeight; \
- op_params.stride_width = _strideWidth; \
- op_params.filter_height = _kernelHeight; \
- op_params.filter_width = _kernelWidth; \
- op_params.padding_values.height = (int8_t)_paddingTop; \
- op_params.padding_values.width = (int8_t)_paddingLeft;
-
-MaxPoolLayer::MaxPoolLayer()
- : _input(nullptr), _output(nullptr), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
- _paddingBottom(0), _strideWidth(0), _strideHeight(0), _kernelWidth(0), _kernelHeight(0),
- _activation(ir::Activation::NONE)
-{
- // DO NOTHING
-}
-
-void MaxPoolLayer::maxPoolFloat32()
-{
- MAXPOOLING_PARAMETERS
- float output_activation_min = 0, output_activation_max = 0;
- CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
- op_params.float_activation_min = output_activation_min;
- op_params.float_activation_max = output_activation_max;
-
- nnfw::cker::MaxPool(op_params, getTensorShape(_input),
- reinterpret_cast<const float *>(_input->buffer()), getTensorShape(_output),
- reinterpret_cast<float *>(_output->buffer()));
-}
-void MaxPoolLayer::maxPoolQuant8()
-{
- MAXPOOLING_PARAMETERS
- int32_t output_activation_min = 0;
- int32_t output_activation_max = 0;
- CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
- &output_activation_max);
- op_params.quantized_activation_min = output_activation_min;
- op_params.quantized_activation_max = output_activation_max;
-
- nnfw::cker::MaxPool(op_params, getTensorShape(_input),
- reinterpret_cast<const uint8_t *>(_input->buffer()), getTensorShape(_output),
- reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void MaxPoolLayer::configure(const IPortableTensor *input, const uint32_t paddingLeft,
- const uint32_t paddingRight, const uint32_t paddingTop,
- const uint32_t paddingBottom, const uint32_t strideWidth,
- const uint32_t strideHeight, const uint32_t kernelWidth,
- const uint32_t kernelHeight, const ir::Activation activation,
- IPortableTensor *output)
-{
- _input = input;
- _paddingLeft = paddingLeft;
- _paddingRight = paddingRight;
- _paddingTop = paddingTop;
- _paddingBottom = paddingBottom;
- _strideWidth = strideWidth;
- _strideHeight = strideHeight;
- _kernelWidth = kernelWidth;
- _kernelHeight = kernelHeight;
- _activation = activation;
- _output = output;
-}
-
-void MaxPoolLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- maxPoolFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- maxPoolQuant8();
- }
- else
- {
- throw std::runtime_error{"MaxPool: unsupported data type"};
- }
-}
-
-#undef MAXPOOLING_PARAMETERS
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MinLayer.cc b/runtime/onert/backend/cpu/ops/MinLayer.cc
deleted file mode 100644
index 20859673b..000000000
--- a/runtime/onert/backend/cpu/ops/MinLayer.cc
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MinLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/MaxMin.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-template <typename T> void MinLayer::minimum()
-{
- nnfw::cker::Min<T>(getTensorShape(_lhs), reinterpret_cast<const T *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const T *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()));
-}
-
-void MinLayer::minQuant8()
-{
- if (_lhs->data_scale() == _rhs->data_scale() && _lhs->data_scale() == _output->data_scale())
- {
- if (_lhs->data_offset() == _rhs->data_offset() && _lhs->data_offset() == _output->data_offset())
- {
- return nnfw::cker::Min<uint8_t>(
- getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
- }
- }
- throw std::runtime_error("Min NYI for quantized");
-}
-
-void MinLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- IPortableTensor *output)
-{
- assert(lhs != nullptr);
- assert(rhs != nullptr);
- assert(output != nullptr);
-
- _lhs = lhs;
- _rhs = rhs;
- _output = output;
-}
-
-void MinLayer::run()
-{
- if (_lhs->data_type() == OperandType::FLOAT32)
- {
- minimum<float>();
- }
- else if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- minQuant8();
- }
- else if (_lhs->data_type() == OperandType::INT32)
- {
- minimum<int32_t>();
- }
- else
- {
- throw std::runtime_error{"Min: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MinLayer.h b/runtime/onert/backend/cpu/ops/MinLayer.h
deleted file mode 100644
index 9bd114e54..000000000
--- a/runtime/onert/backend/cpu/ops/MinLayer.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_MINLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_MINLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class MinLayer : public ::onert::exec::IFunction
-{
-public:
- MinLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
- {
- // DO NOTHING
- }
-
-public:
- template <typename T> void minimum();
-
- void minQuant8();
-
- void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_lhs;
- const IPortableTensor *_rhs;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_MINLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/MulLayer.cc b/runtime/onert/backend/cpu/ops/MulLayer.cc
deleted file mode 100644
index eef73edf3..000000000
--- a/runtime/onert/backend/cpu/ops/MulLayer.cc
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MulLayer.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-void MulLayer::mulFloat32()
-{
- float output_activation_min = 0, output_activation_max = 0;
- CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.float_activation_max = output_activation_max;
- op_params.float_activation_min = output_activation_min;
-
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
- if (need_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void MulLayer::mulQuant8()
-{
- int32_t output_activation_min, output_activation_max;
- CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
- &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
-
- op_params.quantized_activation_max = output_activation_max;
- op_params.quantized_activation_min = output_activation_min;
- op_params.input1_offset = -_lhs->data_offset();
- op_params.input2_offset = -_rhs->data_offset();
- op_params.output_offset = _output->data_offset();
-
- double real_multiplier = _lhs->data_scale() * _rhs->data_scale() / _output->data_scale();
- QuantizeMultiplier(real_multiplier, &op_params.output_multiplier, &op_params.output_shift);
-
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
- if (need_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void MulLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- const ir::Activation activation, IPortableTensor *output)
-{
- _lhs = lhs;
- _rhs = rhs;
- _activation = activation;
- _output = output;
-}
-
-void MulLayer::run()
-{
- if (_output->data_type() == OperandType::FLOAT32)
- {
- mulFloat32();
- }
- else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- mulQuant8();
- }
- else
- {
- throw std::runtime_error{"Mul: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MulLayer.h b/runtime/onert/backend/cpu/ops/MulLayer.h
deleted file mode 100644
index 2c4a98875..000000000
--- a/runtime/onert/backend/cpu/ops/MulLayer.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_MULLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_MULLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class MulLayer : public ::onert::exec::IFunction
-{
-public:
- MulLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
- {
- // DO NOTHING
- }
-
-public:
- void mulFloat32();
-
- void mulQuant8();
-
- void configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- const ir::Activation activation, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_lhs;
- const IPortableTensor *_rhs;
- IPortableTensor *_output;
-
- ir::Activation _activation{ir::Activation::NONE};
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_MULLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/NegLayer.cc b/runtime/onert/backend/cpu/ops/NegLayer.cc
deleted file mode 100644
index 2cb95b771..000000000
--- a/runtime/onert/backend/cpu/ops/NegLayer.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "NegLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-NegLayer::NegLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void NegLayer::negFloat32()
-{
- nnfw::cker::Neg(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void NegLayer::negQuant8() { throw std::runtime_error{"NYI"}; }
-
-void NegLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void NegLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- negFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- negQuant8();
- }
- else
- {
- throw std::runtime_error{"Neg: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/NegLayer.h b/runtime/onert/backend/cpu/ops/NegLayer.h
deleted file mode 100644
index addf84ec2..000000000
--- a/runtime/onert/backend/cpu/ops/NegLayer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_NEGLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_NEGLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class NegLayer : public ::onert::exec::IFunction
-{
-public:
- NegLayer();
-
-public:
- void negFloat32();
-
- void negQuant8();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_NEGLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/PoolLayer.cc b/runtime/onert/backend/cpu/ops/PoolLayer.cc
new file mode 100644
index 000000000..85d02a751
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/PoolLayer.cc
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PoolLayer.h"
+
+#include <cker/operation/AveragePool.h>
+#include <cker/operation/MaxPool.h>
+
+#include <unordered_map>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+namespace
+{
+template <typename T>
+void avgPool2D(const nnfw::cker::PoolParams &params, const IPortableTensor *input,
+ IPortableTensor *output)
+{
+ nnfw::cker::AveragePool<T>(params, getTensorShape(input),
+ reinterpret_cast<const T *>(input->buffer()), getTensorShape(output),
+ reinterpret_cast<T *>(output->buffer()));
+}
+
+template <typename T>
+void maxPool2D(const nnfw::cker::PoolParams &params, const IPortableTensor *input,
+ IPortableTensor *output)
+{
+ nnfw::cker::MaxPool<T>(params, getTensorShape(input),
+ reinterpret_cast<const T *>(input->buffer()), getTensorShape(output),
+ reinterpret_cast<T *>(output->buffer()));
+}
+
+template <typename T>
+std::function<void(const IPortableTensor *, IPortableTensor *)>
+generateKernelGeneric(const nnfw::cker::PoolParams &params, PoolType op_type)
+{
+ if (op_type == PoolType::kAvg)
+ {
+ return std::bind(&avgPool2D<T>, params, std::placeholders::_1, std::placeholders::_2);
+ }
+ else if (op_type == PoolType::kMax)
+ {
+ return std::bind(&maxPool2D<T>, params, std::placeholders::_1, std::placeholders::_2);
+ }
+ else
+ {
+ throw std::runtime_error{"Pool: unsupported pool type"};
+ }
+}
+} // namespace
+
+PoolLayer::PoolLayer() : _input(nullptr), _output(nullptr), _kernel()
+{
+ // DO NOTHING
+}
+
+#define POOLING_PARAMETERS \
+ nnfw::cker::PoolParams op_params; \
+ op_params.stride_height = strideHeight; \
+ op_params.stride_width = strideWidth; \
+ op_params.filter_height = kernelHeight; \
+ op_params.filter_width = kernelWidth; \
+ op_params.padding_values.height = (int8_t)paddingTop; \
+ op_params.padding_values.width = (int8_t)paddingLeft;
+
+void PoolLayer::configure(const IPortableTensor *input, const uint32_t paddingLeft, const uint32_t,
+ const uint32_t paddingTop, const uint32_t, const uint32_t strideWidth,
+ const uint32_t strideHeight, const uint32_t kernelWidth,
+ const uint32_t kernelHeight, const ir::Activation activation,
+ IPortableTensor *output, const PoolType op_type)
+{
+ assert(input != nullptr);
+ assert(output != nullptr);
+
+ _input = input;
+ _output = output;
+
+ POOLING_PARAMETERS
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ float output_activation_min = 0;
+ float output_activation_max = 0;
+ CalculateActivationRange<float>(activation, &output_activation_min, &output_activation_max);
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
+
+ _kernel = generateKernelGeneric<float>(op_params, op_type);
+ }
+ else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ int32_t output_activation_min = 0;
+ int32_t output_activation_max = 0;
+ CalculateActivationRangeUint8(activation, _output, &output_activation_min,
+ &output_activation_max);
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
+ _kernel = generateKernelGeneric<uint8_t>(op_params, op_type);
+ }
+ else
+ {
+ throw std::runtime_error{"Pool: unsupported data type"};
+ }
+}
+
+void PoolLayer::run() { _kernel(_input, _output); }
+
+#undef AVGPOOLING_PARAMETERS
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MaxPoolLayer.h b/runtime/onert/backend/cpu/ops/PoolLayer.h
index 4c5109f64..b37835946 100644
--- a/runtime/onert/backend/cpu/ops/MaxPoolLayer.h
+++ b/runtime/onert/backend/cpu/ops/PoolLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CPU_OPS_MAXPOOLLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_MAXPOOLLAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_POOLLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_POOLLAYER_H__
#include <backend/IPortableTensor.h>
#include "OperationUtils.h"
@@ -31,22 +31,25 @@ namespace cpu
namespace ops
{
-class MaxPoolLayer : public ::onert::exec::IFunction
+enum class PoolType
{
-public:
- MaxPoolLayer();
+ kAvg,
+ kL2,
+ kMax,
+};
+class PoolLayer : public ::onert::exec::IFunction
+{
public:
- void maxPoolFloat32();
-
- void maxPoolQuant8();
+ PoolLayer();
+public:
void configure(const IPortableTensor *input, const uint32_t paddingLeft,
const uint32_t paddingRight, const uint32_t paddingTop,
const uint32_t paddingBottom, const uint32_t strideWidth,
const uint32_t strideHeight, const uint32_t kernelWidth,
const uint32_t kernelHeight, const ir::Activation activation,
- IPortableTensor *output);
+ IPortableTensor *output, const PoolType op_type);
void run() override;
@@ -54,17 +57,7 @@ private:
const IPortableTensor *_input;
IPortableTensor *_output;
- uint32_t _paddingLeft;
- uint32_t _paddingTop;
- uint32_t _paddingRight;
- uint32_t _paddingBottom;
-
- uint32_t _strideWidth;
- uint32_t _strideHeight;
- uint32_t _kernelWidth;
- uint32_t _kernelHeight;
-
- ir::Activation _activation;
+ std::function<void(const IPortableTensor *, IPortableTensor *)> _kernel;
};
} // namespace ops
@@ -72,4 +65,4 @@ private:
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CPU_OPS_MAXPOOLLAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_POOLLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/QuantizeLayer.cc b/runtime/onert/backend/cpu/ops/QuantizeLayer.cc
deleted file mode 100644
index 45fc148bf..000000000
--- a/runtime/onert/backend/cpu/ops/QuantizeLayer.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "QuantizeLayer.h"
-
-#include <cker/operation/Quantize.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-QuantizeLayer::QuantizeLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-template <typename InputT, typename OutputT> void QuantizeLayer::affineQuantize()
-{
- nnfw::cker::Quantize(getTensorShape(_input), reinterpret_cast<const InputT *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<OutputT *>(_output->buffer()),
- _output->data_scale(), _output->data_offset());
-}
-
-void QuantizeLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void QuantizeLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- affineQuantize<float, uint8_t>();
- }
- else
- {
- throw std::runtime_error{"Quantize: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/QuantizeLayer.h b/runtime/onert/backend/cpu/ops/QuantizeLayer.h
deleted file mode 100644
index b4e7aca40..000000000
--- a/runtime/onert/backend/cpu/ops/QuantizeLayer.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class QuantizeLayer : public ::onert::exec::IFunction
-{
-public:
- QuantizeLayer();
-
-public:
- template <typename InputT, typename OutputT> void affineQuantize();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/RoundLayer.cc b/runtime/onert/backend/cpu/ops/RankLayer.cc
index 185d7554e..4690bdf72 100644
--- a/runtime/onert/backend/cpu/ops/RoundLayer.cc
+++ b/runtime/onert/backend/cpu/ops/RankLayer.cc
@@ -14,12 +14,10 @@
* limitations under the License.
*/
-#include "RoundLayer.h"
+#include "RankLayer.h"
#include "OperationUtils.h"
-#include <cker/operation/Round.h>
-
namespace onert
{
namespace backend
@@ -28,32 +26,28 @@ namespace cpu
{
namespace ops
{
-RoundLayer::RoundLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-void RoundLayer::roundFloat32()
+RankLayer::RankLayer() : _input(nullptr), _output(nullptr)
{
- nnfw::cker::Round(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+ // DO NOTHING
}
-void RoundLayer::configure(const IPortableTensor *input, IPortableTensor *output)
+void RankLayer::configure(const IPortableTensor *input, IPortableTensor *output)
{
_input = input;
_output = output;
}
-void RoundLayer::run()
+void RankLayer::run()
{
- if (_input->data_type() == OperandType::FLOAT32)
+ if (_input->data_type() == OperandType::FLOAT32 || _input->data_type() == OperandType::INT32)
{
- roundFloat32();
+ int32_t *output_data = reinterpret_cast<int32_t *>(_output->buffer());
+ output_data[0] = _input->num_dimensions();
}
else
{
- throw std::runtime_error{"Round: unsupported data type"};
+ throw std::runtime_error{"Rank : unsupported data type"};
}
}
diff --git a/runtime/onert/backend/cpu/ops/ZerosLikeLayer.h b/runtime/onert/backend/cpu/ops/RankLayer.h
index 054894203..6282ceb07 100644
--- a/runtime/onert/backend/cpu/ops/ZerosLikeLayer.h
+++ b/runtime/onert/backend/cpu/ops/RankLayer.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CPU_OPS_ZEROS_LIKE_LAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_ZEROS_LIKE_LAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_RANKLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_RANKLAYER_H__
#include <backend/IPortableTensor.h>
@@ -29,11 +29,13 @@ namespace cpu
{
namespace ops
{
-class ZerosLikeLayer : public ::onert::exec::IFunction
+
+class RankLayer : public ::onert::exec::IFunction
{
public:
- ZerosLikeLayer();
+ RankLayer();
+public:
void configure(const IPortableTensor *input, IPortableTensor *output);
void run() override;
@@ -48,4 +50,4 @@ private:
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CPU_OPS_ZEROS_LIKE_LAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_RANKLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ReLU6Layer.cc b/runtime/onert/backend/cpu/ops/ReLU6Layer.cc
deleted file mode 100644
index 26eb35e0d..000000000
--- a/runtime/onert/backend/cpu/ops/ReLU6Layer.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ReLU6Layer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/ReLU6.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-ReLU6Layer::ReLU6Layer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void ReLU6Layer::relu6Float32()
-{
- nnfw::cker::ReLU6(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- reinterpret_cast<float *>(_output->buffer()));
-}
-
-void ReLU6Layer::relu6Quant8()
-{
- // cker quant8 relu is not implemented yet
- throw std::runtime_error{"NYI"};
-}
-
-void ReLU6Layer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void ReLU6Layer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- relu6Float32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- relu6Quant8();
- }
- else
- {
- throw std::runtime_error{"ReLU6: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ReLULayer.cc b/runtime/onert/backend/cpu/ops/ReLULayer.cc
deleted file mode 100644
index cb4529feb..000000000
--- a/runtime/onert/backend/cpu/ops/ReLULayer.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ReLULayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/ReLU.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-ReLULayer::ReLULayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void ReLULayer::reluFloat32()
-{
- nnfw::cker::ReLU(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void ReLULayer::reluQuant8()
-{
- // cker quant8 relu is not implemented yet
- throw std::runtime_error{"NYI"};
-}
-
-void ReLULayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void ReLULayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- reluFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- reluQuant8();
- }
- else
- {
- throw std::runtime_error{"ReLU: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ReLULayer.h b/runtime/onert/backend/cpu/ops/ReLULayer.h
deleted file mode 100644
index 4ba2be772..000000000
--- a/runtime/onert/backend/cpu/ops/ReLULayer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_RELULAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_RELULAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class ReLULayer : public ::onert::exec::IFunction
-{
-public:
- ReLULayer();
-
-public:
- void reluFloat32();
-
- void reluQuant8();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_RELULAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ReduceLayer.cc b/runtime/onert/backend/cpu/ops/ReduceLayer.cc
index fe22dbed7..bb5f85d60 100644
--- a/runtime/onert/backend/cpu/ops/ReduceLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ReduceLayer.cc
@@ -49,27 +49,31 @@ void evalLogic(const IPortableTensor *input, IPortableTensor *output, const std:
}
template <typename T>
-void evalType(const IPortableTensor *input, IPortableTensor *output, const std::vector<int> &axes,
- bool keep_dims, nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type)
+std::function<void(const IPortableTensor *, IPortableTensor *, const std::vector<int> &)>
+evalType(bool keep_dims, nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type)
{
switch (reduce_type)
{
case ReduceType::kSum:
- return evalLogic<T>(input, output, axes, keep_dims, static_cast<T>(0), reduce_kernel,
- [](const T current, const T in) -> T { return in + current; });
+ return std::bind(&evalLogic<T>, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, keep_dims, static_cast<T>(0), reduce_kernel,
+ [](const T current, const T in) -> T { return in + current; });
break;
case ReduceType::kProd:
- return evalLogic<T>(input, output, axes, keep_dims, static_cast<T>(1), reduce_kernel,
- [](const T current, const T in) -> T { return in * current; });
+ return std::bind(&evalLogic<T>, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, keep_dims, static_cast<T>(1), reduce_kernel,
+ [](const T current, const T in) -> T { return in * current; });
break;
case ReduceType::kMax:
- return evalLogic<T>(
- input, output, axes, keep_dims, std::numeric_limits<T>::lowest(), reduce_kernel,
+ return std::bind(
+ &evalLogic<T>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+ keep_dims, std::numeric_limits<T>::lowest(), reduce_kernel,
[](const T current, const T in) -> T { return (in > current) ? in : current; });
break;
case ReduceType::kMin:
- return evalLogic<T>(
- input, output, axes, keep_dims, std::numeric_limits<T>::max(), reduce_kernel,
+ return std::bind(
+ &evalLogic<T>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+ keep_dims, std::numeric_limits<T>::max(), reduce_kernel,
[](const T current, const T in) -> T { return (in < current) ? in : current; });
break;
default:
@@ -79,44 +83,44 @@ void evalType(const IPortableTensor *input, IPortableTensor *output, const std::
// Template specialization for bool type
template <>
-void evalType<bool>(const IPortableTensor *input, IPortableTensor *output,
- const std::vector<int> &axes, bool keep_dims, nnfw::cker::Reduce &reduce_kernel,
- ReduceType reduce_type)
+std::function<void(const IPortableTensor *, IPortableTensor *, const std::vector<int> &)>
+evalType<bool>(bool keep_dims, nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type)
{
switch (reduce_type)
{
case ReduceType::kAny:
- return evalLogic<bool>(
- input, output, axes, keep_dims, false, reduce_kernel,
- [](const bool current, const bool in) -> bool { return in || current; });
+ return std::bind(&evalLogic<bool>, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, keep_dims, false, reduce_kernel,
+ [](const bool current, const bool in) -> bool { return in || current; });
break;
case ReduceType::kAll:
- return evalLogic<bool>(
- input, output, axes, keep_dims, true, reduce_kernel,
- [](const bool current, const bool in) -> bool { return in && current; });
+ return std::bind(&evalLogic<bool>, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, keep_dims, true, reduce_kernel,
+ [](const bool current, const bool in) -> bool { return in && current; });
break;
default:
throw std::runtime_error{"Reduce: Unsupported reduce type"};
}
}
-template <ReduceType reduce_type>
-void evalGeneric(const IPortableTensor *input, IPortableTensor *output,
- const std::vector<int> &axes, bool keep_dims, nnfw::cker::Reduce &reduce_kernel)
+std::function<void(const IPortableTensor *, IPortableTensor *, const std::vector<int> &)>
+generateKernelGeneric(const IPortableTensor *input, bool keep_dims,
+ nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type)
{
switch (input->data_type())
{
case OperandType::FLOAT32:
- return evalType<float>(input, output, axes, keep_dims, reduce_kernel, reduce_type);
+ return evalType<float>(keep_dims, reduce_kernel, reduce_type);
case OperandType::INT32:
- return evalType<int32_t>(input, output, axes, keep_dims, reduce_kernel, reduce_type);
+ return evalType<int32_t>(keep_dims, reduce_kernel, reduce_type);
case OperandType::BOOL8:
- return evalType<bool>(input, output, axes, keep_dims, reduce_kernel, reduce_type);
+ return evalType<bool>(keep_dims, reduce_kernel, reduce_type);
default:
throw std::runtime_error{"Reduce(generic): unsupported data type"};
}
}
+// TODO Refine this function
void evalSumQuantized(const IPortableTensor *input, IPortableTensor *output,
const std::vector<int> &axes, bool keep_dims,
nnfw::cker::Reduce &reduce_kernel)
@@ -146,14 +150,15 @@ void evalSumQuantized(const IPortableTensor *input, IPortableTensor *output,
return;
}
- evalGeneric<ReduceType::kSum>(input, output, axes, keep_dims, reduce_kernel);
+ const auto kernel = generateKernelGeneric(input, keep_dims, reduce_kernel, ReduceType::kSum);
+ kernel(input, output, axes);
}
} // namespace
ReduceLayer::ReduceLayer()
- : _input(nullptr), _axes(nullptr), _output(nullptr), _reduceType(ReduceType::kAny),
- _keep_dims(false), _reduce_kernel(new nnfw::cker::Reduce())
+ : _input(nullptr), _axes(nullptr), _output(nullptr), _reduce_kernel(new nnfw::cker::Reduce()),
+ _kernel()
{
// DO NOTHING
}
@@ -166,43 +171,44 @@ void ReduceLayer::configure(const IPortableTensor *input, const IPortableTensor
_input = input;
_axes = axes;
_output = output;
- _reduceType = reduceType;
- _keep_dims = keep_dims;
-}
-void ReduceLayer::run()
-{
- const auto axes = getReducerAxes(_axes);
- switch (_reduceType)
+ switch (reduceType)
{
case ReduceType::kSum:
if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
{
- evalSumQuantized(_input, _output, axes, _keep_dims, *_reduce_kernel);
+ _kernel = std::bind(&evalSumQuantized, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, keep_dims, *_reduce_kernel);
return;
}
- evalGeneric<ReduceType::kSum>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+ _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kSum);
break;
case ReduceType::kProd:
- evalGeneric<ReduceType::kProd>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+ _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kProd);
break;
case ReduceType::kMax:
- evalGeneric<ReduceType::kMax>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+ _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kMax);
break;
case ReduceType::kMin:
- evalGeneric<ReduceType::kMin>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+ _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kMin);
break;
case ReduceType::kAny:
- evalGeneric<ReduceType::kAny>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+ _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kAny);
break;
case ReduceType::kAll:
- evalGeneric<ReduceType::kAll>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+ _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kAll);
break;
default:
throw std::runtime_error{"ReduceSum: Unsupported reduce type"};
}
}
+void ReduceLayer::run()
+{
+ const auto axes = getReducerAxes(_axes);
+ _kernel(_input, _output, axes);
+}
+
} // namespace ops
} // namespace cpu
} // namespace backend
diff --git a/runtime/onert/backend/cpu/ops/ReduceLayer.h b/runtime/onert/backend/cpu/ops/ReduceLayer.h
index 8e7bcdb07..332d399bd 100644
--- a/runtime/onert/backend/cpu/ops/ReduceLayer.h
+++ b/runtime/onert/backend/cpu/ops/ReduceLayer.h
@@ -65,10 +65,11 @@ private:
const IPortableTensor *_input;
const IPortableTensor *_axes;
IPortableTensor *_output;
- ReduceType _reduceType;
- bool _keep_dims;
std::unique_ptr<nnfw::cker::Reduce> _reduce_kernel;
+ std::function<void(const IPortableTensor *input, IPortableTensor *output,
+ const std::vector<int> &axes)>
+ _kernel;
};
} // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/RoundLayer.h b/runtime/onert/backend/cpu/ops/RoundLayer.h
deleted file mode 100644
index fc6a46c0d..000000000
--- a/runtime/onert/backend/cpu/ops/RoundLayer.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_ROUNDLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_ROUNDLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class RoundLayer : public ::onert::exec::IFunction
-{
-public:
- RoundLayer();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- void roundFloat32();
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_ROUNDLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/RsqrtLayer.cc b/runtime/onert/backend/cpu/ops/RsqrtLayer.cc
deleted file mode 100644
index 0bd468f96..000000000
--- a/runtime/onert/backend/cpu/ops/RsqrtLayer.cc
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "RsqrtLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-RsqrtLayer::RsqrtLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void RsqrtLayer::rsqrtFloat32()
-{
- nnfw::cker::Rsqrt(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void RsqrtLayer::rsqrtQuant8() { throw std::runtime_error{"NYI : QASYMM8 not supported"}; }
-
-void RsqrtLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void RsqrtLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- rsqrtFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- rsqrtQuant8();
- }
- else
- {
- throw std::runtime_error{"Rsqrt: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/RsqrtLayer.h b/runtime/onert/backend/cpu/ops/RsqrtLayer.h
deleted file mode 100644
index 49abbb08d..000000000
--- a/runtime/onert/backend/cpu/ops/RsqrtLayer.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_RSQRTLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_RSQRTLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class RsqrtLayer : public ::onert::exec::IFunction
-{
-public:
- RsqrtLayer();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- void rsqrtFloat32();
- void rsqrtQuant8();
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_RSQRTLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/SinLayer.cc b/runtime/onert/backend/cpu/ops/SinLayer.cc
deleted file mode 100644
index 2a6b11753..000000000
--- a/runtime/onert/backend/cpu/ops/SinLayer.cc
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "SinLayer.h"
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-SinLayer::SinLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void SinLayer::sinFloat32()
-{
- nnfw::cker::Sin(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void SinLayer::sinQuant8() { throw std::runtime_error{"NYI"}; }
-
-void SinLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void SinLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- sinFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- sinQuant8();
- }
- else
- {
- throw std::runtime_error{"Sin: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/SinLayer.h b/runtime/onert/backend/cpu/ops/SinLayer.h
deleted file mode 100644
index 348350f41..000000000
--- a/runtime/onert/backend/cpu/ops/SinLayer.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_SINLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_SINLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class SinLayer : public ::onert::exec::IFunction
-{
-public:
- SinLayer();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- void sinFloat32();
- void sinQuant8();
-
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_SINLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc b/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc
index 6e2bb584a..095e67abc 100644
--- a/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc
+++ b/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc
@@ -34,55 +34,23 @@ SoftMaxLayer::SoftMaxLayer() : _input(nullptr), _output(nullptr), _beta(0.0)
// DO NOTHING
}
-// Performs softmax along the input of size (input_size * batch_size).
-void Softmax(const float *in, const int input_size, const int batch_size, const float beta,
- float *out)
+void SoftMaxLayer::softmaxFloat32()
{
- assert(input_size > 0);
-
- // For each batch
- for (int b = 0; b < batch_size; b++)
+ if (getNumberOfDimensions(_input) == 1)
{
- // Find the max coeff.
- float max_coeff = in[0];
- for (int i = 1; i < input_size; i++)
- {
- if (in[i] > max_coeff)
- max_coeff = in[i];
- }
-
- // Compute the normalized sum of exps.
- float exp_sum = 0.0;
- for (int i = 0; i < input_size; i++)
- {
- out[i] = std::exp((in[i] - max_coeff) * beta);
- exp_sum += out[i];
- }
-
- // Divide by the sum of exps.
- float reciprocal_sum_exp = 1.f / exp_sum;
- for (int i = 0; i < input_size; i++)
- {
- out[i] *= reciprocal_sum_exp;
- }
-
- // Advance in and out pointers for the next batch.
- in += input_size;
- out += input_size;
+ uint32_t input_size = getNumberOfElements(_input);
+ nnfw::cker::Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, 1, _beta,
+ reinterpret_cast<float *>(_output->buffer()));
}
-}
-
-void SoftMaxLayer::softmaxFloat32()
-{
- if (getNumberOfDimensions(_input) == 2)
+ else if (getNumberOfDimensions(_input) == 2)
{
uint32_t batch_size = getSizeOfDimension(_input, 0);
if (batch_size == 0)
throw std::runtime_error("batch_size should not be 0");
uint32_t input_size = getNumberOfElements(_input) / batch_size;
- Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, batch_size, _beta,
- reinterpret_cast<float *>(_output->buffer()));
+ nnfw::cker::Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, batch_size,
+ _beta, reinterpret_cast<float *>(_output->buffer()));
}
else if (getNumberOfDimensions(_input) == 4)
{
@@ -94,7 +62,7 @@ void SoftMaxLayer::softmaxFloat32()
}
else
{
- throw std::runtime_error{"only 2D and 4D tensors supported"};
+ throw std::runtime_error{"only 1D, 2D and 4D tensors supported"};
}
}
diff --git a/runtime/onert/backend/cpu/ops/SubLayer.cc b/runtime/onert/backend/cpu/ops/SubLayer.cc
deleted file mode 100644
index 597d52952..000000000
--- a/runtime/onert/backend/cpu/ops/SubLayer.cc
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "SubLayer.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-void SubLayer::subFloat32()
-{
- float output_activation_min = 0, output_activation_max = 0;
- CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.float_activation_max = output_activation_max;
- op_params.float_activation_min = output_activation_min;
-
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
- if (need_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void SubLayer::subInt32()
-{
- int32_t output_activation_min = 0, output_activation_max = 0;
- CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.quantized_activation_max = output_activation_max;
- op_params.quantized_activation_min = output_activation_min;
-
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
- if (need_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer()));
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer()));
-}
-
-void SubLayer::subQuant8()
-{
- int32_t output_activation_min, output_activation_max;
- CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
- &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.quantized_activation_max = output_activation_max;
- op_params.quantized_activation_min = output_activation_min;
- // Parameters for scaled quantized computation
- op_params.left_shift = 20;
- // Zero-points of input and output tensors
- op_params.input1_offset = -_lhs->data_offset();
- op_params.input2_offset = -_rhs->data_offset();
- op_params.output_offset = _output->data_offset();
- assert((op_params.input1_offset >= 0) && (op_params.input1_offset <= 255));
- assert((op_params.input2_offset >= 0) && (op_params.input2_offset <= 255));
- assert((op_params.output_offset >= 0) && (op_params.output_offset <= 255));
-
- // Compute normalized scale for _lhs and _rhs values,
- // and represent in 32-bit fixed point
- const double norm_max_scale = 2 * std::max(_lhs->data_scale(), _rhs->data_scale());
- const double real_lhs_scale = _lhs->data_scale() / norm_max_scale;
- const double real_rhs_scale = _rhs->data_scale() / norm_max_scale;
- // output scale is used to normalize final result, so we invert the scale here
- const double real_output_scale =
- norm_max_scale / (_output->data_scale() * (1 << op_params.left_shift));
-
- // Represent the scales as fixed int32_t multipliers, and int32_t shifts
- QuantizeMultiplier(real_lhs_scale, &op_params.input1_multiplier, &op_params.input1_shift);
- QuantizeMultiplier(real_rhs_scale, &op_params.input2_multiplier, &op_params.input2_shift);
- op_params.input2_multiplier *= -1;
- QuantizeMultiplier(real_output_scale, &op_params.output_multiplier, &op_params.output_shift);
-
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
- if (need_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void SubLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- const ir::Activation activation, IPortableTensor *output)
-{
- _lhs = lhs;
- _rhs = rhs;
- _activation = activation;
- _output = output;
-}
-
-void SubLayer::run()
-{
- if (_output->data_type() == OperandType::FLOAT32)
- {
- subFloat32();
- }
- else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- subQuant8();
- }
- else if (_output->data_type() == OperandType::INT32)
- {
- subInt32();
- }
- else
- {
- throw std::runtime_error{"Sub: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/SubLayer.h b/runtime/onert/backend/cpu/ops/SubLayer.h
deleted file mode 100644
index 86f32ca6d..000000000
--- a/runtime/onert/backend/cpu/ops/SubLayer.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_SUBLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_SUBLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class SubLayer : public ::onert::exec::IFunction
-{
-public:
- SubLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
- {
- // DO NOTHING
- }
-
-public:
- void subFloat32();
-
- void subQuant8();
-
- void subInt32();
-
- void configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- const ir::Activation activation, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_lhs;
- const IPortableTensor *_rhs;
- IPortableTensor *_output;
-
- ir::Activation _activation{ir::Activation::NONE};
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_SUBLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/TanhLayer.cc b/runtime/onert/backend/cpu/ops/TanhLayer.cc
deleted file mode 100644
index 910ac1f41..000000000
--- a/runtime/onert/backend/cpu/ops/TanhLayer.cc
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TanhLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Tanh.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-TanhLayer::TanhLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void TanhLayer::PopulateLookupTable()
-{
- const auto input_scale = static_cast<double>(_input->data_scale());
- const auto input_zero_point = static_cast<int32_t>(_input->data_offset());
- const auto output_scale = static_cast<double>(_output->data_scale());
- const auto output_zero_point = static_cast<int32_t>(_output->data_offset());
- const float inverse_scale = 1 / output_scale;
- int32_t maxval = std::numeric_limits<uint8_t>::max();
- int32_t minval = std::numeric_limits<uint8_t>::min();
- for (int32_t val = minval; val <= maxval; ++val)
- {
- const float dequantized = input_scale * (val - input_zero_point);
- const float transformed = std::tanh(dequantized);
- const float rescaled = std::round(transformed * inverse_scale);
- const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
- _table[val] = static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval));
- }
-}
-
-void TanhLayer::tanhFloat32()
-{
- nnfw::cker::Tanh(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void TanhLayer::tanhQuant8()
-{
- const int size = MatchingFlatSize(getTensorShape(_input), getTensorShape(_output));
- const uint8_t *input_data = reinterpret_cast<const uint8_t *>(_input->buffer());
- uint8_t *output_data = reinterpret_cast<uint8_t *>(_output->buffer());
-
- for (int i = 0; i < size; ++i)
- {
- output_data[i] = _table[input_data[i]];
- }
-}
-
-void TanhLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
- if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- PopulateLookupTable();
- }
-}
-
-void TanhLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- tanhFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- tanhQuant8();
- }
- else
- {
- throw std::runtime_error{"Tanh: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ZerosLikeLayer.cc b/runtime/onert/backend/cpu/ops/ZerosLikeLayer.cc
deleted file mode 100644
index ae8084518..000000000
--- a/runtime/onert/backend/cpu/ops/ZerosLikeLayer.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ZerosLikeLayer.h"
-
-#include "OperationUtils.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-ZerosLikeLayer::ZerosLikeLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void ZerosLikeLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void ZerosLikeLayer::run()
-{
- if (!HaveSameShapes(_input, _output))
- throw std::runtime_error{"ZerosLike: input and output shape don't match."};
-
- auto element_size = getTensorShape(_input).FlatSize();
-
- switch (_input->data_type())
- {
- case OperandType::FLOAT32:
- memset(reinterpret_cast<float *>(_output->buffer()), 0, element_size * sizeof(float));
- break;
- case OperandType::INT32:
- memset(reinterpret_cast<int32_t *>(_output->buffer()), 0, element_size * sizeof(int32_t));
- break;
- default:
- throw std::runtime_error{"ZerosLike: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/include/backend/BackendContext.h b/runtime/onert/core/include/backend/BackendContext.h
index c263aef2b..1eba29550 100644
--- a/runtime/onert/core/include/backend/BackendContext.h
+++ b/runtime/onert/core/include/backend/BackendContext.h
@@ -29,6 +29,7 @@ class Backend;
class IConstantInitializer;
class IKernelGenerator;
class ITensorRegister;
+struct ITensorRegistry;
struct ITensorBuilder;
struct IOptimizer;
@@ -45,14 +46,15 @@ public:
public:
BackendContext(const Backend *backend, const ir::Graph *graph,
+ std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
std::shared_ptr<ITensorBuilder> tensor_builder = nullptr,
std::shared_ptr<IConstantInitializer> constant_initializer = nullptr,
std::shared_ptr<IKernelGenerator> kernel_gen = nullptr,
std::shared_ptr<ITensorRegister> tensor_register = nullptr,
std::shared_ptr<IOptimizer> optimizer = nullptr)
- : _backend{backend}, _graph{graph}, tensor_builder{tensor_builder},
- constant_initializer{constant_initializer}, kernel_gen{kernel_gen},
- tensor_register{tensor_register}, optimizer{optimizer}
+ : _backend{backend}, _graph{graph}, tensor_registry{tensor_registry},
+ tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+ kernel_gen{kernel_gen}, tensor_register{tensor_register}, optimizer{optimizer}
{
}
@@ -74,6 +76,7 @@ private:
std::vector<ir::OperandIndex> _operand_list;
public:
+ std::shared_ptr<ITensorRegistry> tensor_registry;
std::shared_ptr<ITensorBuilder> tensor_builder;
std::shared_ptr<IConstantInitializer> constant_initializer;
std::shared_ptr<IKernelGenerator> kernel_gen;
diff --git a/runtime/onert/core/include/backend/IConstantInitializer.h b/runtime/onert/core/include/backend/IConstantInitializer.h
index f322015ba..149acecb4 100644
--- a/runtime/onert/core/include/backend/IConstantInitializer.h
+++ b/runtime/onert/core/include/backend/IConstantInitializer.h
@@ -162,14 +162,14 @@ public:
public:
void run()
{
- assert(tensor_builder().get());
+ assert(tensor_registry());
for (const auto &it : _init_map)
{
const auto &ind = it.first;
const auto &fn = it.second;
const auto &model_obj = _operands.at(ind);
- auto tensor_obj = tensor_builder()->tensorAt(ind);
+ auto tensor_obj = tensor_registry()->getNativeITensor(ind);
assert(tensor_obj != nullptr);
fn(model_obj, *tensor_obj);
VERBOSE(FillOperandData) << "Fill data for operand " << ind.value() << std::endl;
@@ -189,10 +189,7 @@ public:
void setLayout(ir::Layout layout) { _current_op_seq_layout = layout; }
protected:
- using OperationVisitor::visit;
-
-protected:
- virtual std::shared_ptr<ITensorBuilder> tensor_builder() const = 0;
+ virtual std::shared_ptr<ITensorRegistry> tensor_registry() const = 0;
public:
virtual void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj)
diff --git a/runtime/onert/core/include/backend/ITensorBuilder.h b/runtime/onert/core/include/backend/ITensorBuilder.h
index b760cda0e..f93ab81ae 100644
--- a/runtime/onert/core/include/backend/ITensorBuilder.h
+++ b/runtime/onert/core/include/backend/ITensorBuilder.h
@@ -40,11 +40,6 @@ struct ITensorBuilder
virtual ~ITensorBuilder(void) = default;
/**
- * @brief Returns true if this TensorBuilder support dynamic tensor
- */
- virtual bool supportDynamicTensor() = 0;
-
- /**
* @brief Register tensor information to allocate on backend
*
* @param ind Index
@@ -63,15 +58,6 @@ struct ITensorBuilder
*/
virtual bool isRegistered(const ir::OperandIndex &) const = 0;
- /**
- * @brief Get tensor registry
- *
- * @return std::shared_ptr<backend::ITensorRegistry> tensor registry object
- *
- * @note Backend should implement this when it has StaticTensorManager and DynamicTensorManager
- */
- virtual std::shared_ptr<backend::ITensorRegistry> tensorRegistry() = 0;
-
public: // methods for static tensor allocation
/**
* @brief Let the tensor builder know first use(start of lifetime) of a tensor
@@ -104,32 +90,6 @@ public: // methods for static tensor allocation
virtual void postFunctionPrepare() = 0;
/**
- * @brief Get the tensor object
- *
- * @param ind Index of the tensor
- * @return std::shared_ptr<ITensor> The tensor object
- */
- virtual std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) = 0;
-
- /**
- * @brief Set the migrant tensor object
- *
- * @return true if succeeded
- * @return false if failed or unsupported
- */
- virtual bool setMigrantTensor(const ir::OperandIndex &, const std::shared_ptr<IPortableTensor> &)
- {
- return false;
- }
-
- /**
- * @brief Iterate over tensors
- *
- * @param fn The function to be run
- */
- virtual void iterate(const IterateFunction &fn) = 0;
-
- /**
* @brief Release static @c ITensorManger object which was built
* Before calling this, @c allocate must have been called
*
@@ -147,10 +107,7 @@ public: // methods for dynamic tensor allocation
* @note Since it is a pointer, its life time is from the cration of TensorBuilder
* to the end of execution
*/
- virtual IDynamicTensorManager *dynamicTensorManager(void)
- {
- throw std::runtime_error("dynamicTensorManager(): NYI");
- }
+ virtual IDynamicTensorManager *dynamicTensorManager(void) { return nullptr; }
/**
* @brief Release dynamic @c ITensorManger object which was built
@@ -158,10 +115,7 @@ public: // methods for dynamic tensor allocation
*
* @return std::unique_ptr<ITensorManager> Tensor Manager object
*/
- virtual std::unique_ptr<ITensorManager> releaseDynamicTensorManager(void)
- {
- throw std::runtime_error("releaseDynamicTensorManager() for this backend is not supported");
- }
+ virtual std::unique_ptr<ITensorManager> releaseDynamicTensorManager(void) { return nullptr; }
};
} // namespace backend
diff --git a/runtime/onert/core/include/backend/ITensorRegistry.h b/runtime/onert/core/include/backend/ITensorRegistry.h
index 855513124..88fcb0fcd 100644
--- a/runtime/onert/core/include/backend/ITensorRegistry.h
+++ b/runtime/onert/core/include/backend/ITensorRegistry.h
@@ -21,6 +21,7 @@
#include "ir/Index.h"
#include "backend/ITensor.h"
+#include "backend/IPortableTensor.h"
namespace onert
{
@@ -51,13 +52,22 @@ struct ITensorRegistry
* @note Returned tensor cannot be used longer than dynamic tensor manager
*/
virtual std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &) = 0;
+ /**
+ * @brief Set the Migrant Tensor which are from other backends
+ *
+ * @return true if supported
+ * @return false if not supported
+ */
+ virtual bool setMigrantTensor(const ir::OperandIndex &, const std::shared_ptr<IPortableTensor> &)
+ {
+ return false;
+ }
};
} // namespace backend
} // namespace onert
#include "ir/OperandIndexMap.h"
-#include "backend/IPortableTensor.h"
namespace onert
{
@@ -108,24 +118,23 @@ public:
return nullptr;
}
- bool setMigrantTensor(const ir::OperandIndex &ind, const std::shared_ptr<IPortableTensor> &tensor)
+ bool setMigrantTensor(const ir::OperandIndex &ind,
+ const std::shared_ptr<IPortableTensor> &tensor) override
{
- // TODO Uncomment this as two tensors for an index is not allowed.
- // But now it is temporarily allowed as a workaround. External one hides Managed one.
- // auto itr = _native.find(ind);
- // if (itr != _native.end() && itr->second != nullptr && tensor != nullptr)
- // throw std::runtime_error{
- // "Tried to set an migrant tensor but an native tensor already exists."};
+ assert(tensor != nullptr);
+ auto itr = _native.find(ind);
+ if (itr != _native.end())
+ throw std::runtime_error{"Tried to set a migrant tensor but a native tensor already exists."};
_migrant[ind] = tensor;
return true;
}
void setNativeTensor(const ir::OperandIndex &ind, const std::shared_ptr<T_Tensor> &tensor)
{
+ assert(tensor != nullptr);
auto itr = _migrant.find(ind);
- if (itr != _migrant.end() && itr->second != nullptr && tensor != nullptr)
- throw std::runtime_error{
- "Tried to set a native tensor but an migrant tensor already exists."};
+ if (itr != _migrant.end())
+ throw std::runtime_error{"Tried to set a native tensor but a migrant tensor already exists."};
_native[ind] = tensor;
}
diff --git a/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h b/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h
index a7e034a91..3f09b7a4a 100644
--- a/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h
+++ b/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h
@@ -20,6 +20,7 @@
#include "MemoryManager.h"
#include "backend/IStaticTensorManager.h"
+#include "backend/IDynamicTensorManager.h"
#include "ir/OperandIndexMap.h"
#include "ir/OperandInfo.h"
#include "TensorRegistry.h"
@@ -34,7 +35,8 @@ namespace cpu_common
class StaticTensorManager : public backend::IStaticTensorManager
{
public:
- StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg);
+ StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
+ IDynamicTensorManager *dynamic_tensor_manager);
virtual ~StaticTensorManager() = default;
void allocateConsts(void);
@@ -55,6 +57,7 @@ private:
std::unique_ptr<MemoryManager> _nonconst_mgr;
const std::shared_ptr<TensorRegistry> _tensors;
ir::OperandIndexMap<bool> _as_constants;
+ IDynamicTensorManager *_dynamic_tensor_manager;
};
} // namespace cpu_common
diff --git a/runtime/onert/core/include/compiler/LoweredGraph.h b/runtime/onert/core/include/compiler/LoweredGraph.h
new file mode 100644
index 000000000..aadba6857
--- /dev/null
+++ b/runtime/onert/core/include/compiler/LoweredGraph.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_LOWERED_GRAPH_H__
+#define __ONERT_IR_LOWERED_GRAPH_H__
+
+#include "ir/Graph.h"
+#include "ir/LowerInfoMap.h"
+#include "ir/OpSequences.h"
+#include "compiler/BackendResolver.h"
+#include "compiler/Compiler.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+/**
+ * @brief Class that contains lowering information on graph.
+ * In addition, after lowering, operands in graph will be set to "dynamic"
+ * if the shape of output of an operation cannot be decided at compilation time.
+ */
+class LoweredGraph
+{
+public:
+ LoweredGraph(const ir::Graph &graph, const compiler::CompilerOptions &options);
+
+ ir::Graph &graph() { return _graph; }
+ const ir::Graph &graph() const { return _graph; }
+ const ir::LowerInfoMap *getLowerInfo() const { return &_lower_info_map; }
+ const ir::operation::LowerInfo *getLowerInfo(const ir::OpSequenceIndex &op_seq_index) const;
+ void setLowerInfo(const ir::OpSequenceIndex &op_seq_index,
+ std::unique_ptr<ir::operation::LowerInfo> &&lower_info);
+ void removeLowerInfo(const ir::OpSequenceIndex &op_seq_index);
+ const ir::operand::LowerInfo *getLowerInfo(const ir::OperandIndex &index) const;
+ ir::operand::LowerInfo *getLowerInfo(const ir::OperandIndex &index);
+ void setLowerInfo(const ir::OperandIndex &index,
+ std::unique_ptr<ir::operand::LowerInfo> &&lower_info);
+ void removeLowerInfo(const ir::OperandIndex &index);
+ ir::OpSequences &op_seqs() { return _op_seqs; }
+ const ir::OpSequences &op_seqs() const { return _op_seqs; }
+ void iterateTopolOpSeqs(
+ const std::function<void(const ir::OpSequenceIndex &, const ir::OpSequence &)> &fn) const;
+ void
+ iterateTopolOpSeqs(const std::function<void(const ir::OpSequenceIndex &, ir::OpSequence &)> &fn);
+ const backend::BackendContexts &backend_contexts() { return _backend_contexts; }
+ const backend::BackendContexts &backend_contexts() const { return _backend_contexts; }
+ std::shared_ptr<ir::OperationIndexMap<int64_t>> indexed_ranks() { return _indexed_ranks; }
+
+private:
+ void
+ makeOpSequences(ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
+ const compiler::CompilerOptions &options,
+ const compiler::BackendResolver &backend_resolver);
+
+ void manipulateLowerInfo(
+ ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
+ bool is_primary);
+ void dumpLowerInfo();
+ bool mergeable(const ir::OpSequenceIndex &op_seq_index, const ir::OperationIndex &node_index,
+ ir::Layout layout, const compiler::BackendResolver &backend_resolver);
+ ir::OpSequenceIndex appendFreshSingleOpSequence(const ir::OperationIndex &node_index,
+ const ir::Operation &node);
+
+private:
+ ir::Graph _graph;
+ backend::BackendContexts _backend_contexts;
+ std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
+ ir::LowerInfoMap _lower_info_map;
+ // Pass(for Perm) can accept only graph so that Graph has OpSequences as a member
+ ir::OpSequences _op_seqs;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_IR_LOWERED_GRAPH_H__
diff --git a/runtime/onert/core/include/compiler/StaticShapeInference.h b/runtime/onert/core/include/compiler/StaticShapeInference.h
index bff68c9fa..b97cb5b7b 100644
--- a/runtime/onert/core/include/compiler/StaticShapeInference.h
+++ b/runtime/onert/core/include/compiler/StaticShapeInference.h
@@ -19,7 +19,7 @@
#include "ir/OperationVisitor.h"
#include "ir/OpSequence.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
#include "ir/Index.h"
#include <memory>
@@ -41,7 +41,8 @@ class StaticShapeInferer : public ir::OperationVisitor
public:
StaticShapeInferer(
const ir::SubgraphIndex &subg_idx,
- const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<ir::LoweredGraph>> &lowered_subgs)
+ const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
+ &lowered_subgs)
: _lowered_subgs(lowered_subgs), _operands(lowered_subgs.at(subg_idx)->graph().operands()),
_operations(lowered_subgs.at(subg_idx)->graph().operations()),
_return_has_dynamic_tensor(false)
@@ -57,54 +58,34 @@ public:
* @param op_seq sequence of operations
* @return @c true if op_seq's input or output has any dynamic tensor; @c false otherwise.
*/
- bool infer(const ir::OpSequence &op_seq)
- {
- bool has_dynamic_tensor = false;
-
- _return_has_dynamic_tensor = false; // this is used as a return value inside operation's visit()
-
- for (const auto &operation_idx : op_seq.operations())
- {
- _operations.at(operation_idx).accept(*this);
-
- has_dynamic_tensor = has_dynamic_tensor || _return_has_dynamic_tensor;
- }
-
- return has_dynamic_tensor;
- }
+ bool infer(const ir::OpSequence &op_seq);
void dump();
private:
+ bool checkDynamicInput(const ir::Operation &op);
+ void setDynamicOutput(const ir::Operation &op);
+
+private:
// TODO Define visitors for operations. List them in alphabetic order.
- void visit(const ir::operation::Abs &op) override;
- void visit(const ir::operation::Add &op) override;
void visit(const ir::operation::ArgMax &op) override;
void visit(const ir::operation::BatchMatMul &op) override;
+ void visit(const ir::operation::BinaryArithmetic &op) override;
void visit(const ir::operation::BroadcastTo &op) override;
- void visit(const ir::operation::Cast &op) override;
void visit(const ir::operation::Comparison &op) override;
void visit(const ir::operation::Concat &op) override;
void visit(const ir::operation::Conv2D &op) override;
- void visit(const ir::operation::Cos &op) override;
- void visit(const ir::operation::Div &op) override;
- void visit(const ir::operation::Exp &op) override;
+ void visit(const ir::operation::ElementwiseActivation &op) override;
+ void visit(const ir::operation::ElementwiseBinary &op) override;
+ void visit(const ir::operation::ElementwiseUnary &op) override;
void visit(const ir::operation::ExpandDims &op) override;
void visit(const ir::operation::Fill &op) override;
void visit(const ir::operation::FullyConnected &op) override;
void visit(const ir::operation::FusedBatchNorm &op) override;
void visit(const ir::operation::Gather &op) override;
void visit(const ir::operation::If &op) override;
- void visit(const ir::operation::Log &op) override;
- void visit(const ir::operation::LogicalNot &op) override;
- void visit(const ir::operation::LogicalOr &op) override;
- void visit(const ir::operation::Logistic &op) override;
void visit(const ir::operation::L2Normalization &op) override;
void visit(const ir::operation::MatrixBandPart &op) override;
- void visit(const ir::operation::Max &op) override;
- void visit(const ir::operation::Min &op) override;
- void visit(const ir::operation::Mul &op) override;
- void visit(const ir::operation::Neg &op) override;
void visit(const ir::operation::OneHot &op) override;
void visit(const ir::operation::Pack &op) override;
void visit(const ir::operation::Pad &op) override;
@@ -113,27 +94,21 @@ private:
void visit(const ir::operation::Range &op) override;
void visit(const ir::operation::Reduce &op) override;
void visit(const ir::operation::Reshape &op) override;
- void visit(const ir::operation::Round &op) override;
- void visit(const ir::operation::RSQRT &op) override;
void visit(const ir::operation::ResizeBilinear &op) override;
void visit(const ir::operation::Reverse &op) override;
void visit(const ir::operation::Select &op) override;
void visit(const ir::operation::Shape &op) override;
- void visit(const ir::operation::Sin &op) override;
void visit(const ir::operation::Slice &op) override;
void visit(const ir::operation::Softmax &op) override;
void visit(const ir::operation::SpaceToBatchND &op) override;
void visit(const ir::operation::Split &op) override;
void visit(const ir::operation::Squeeze &op) override;
void visit(const ir::operation::StridedSlice &op) override;
- void visit(const ir::operation::Sub &op) override;
void visit(const ir::operation::SquaredDifference &op) override;
- void visit(const ir::operation::Tanh &op) override;
void visit(const ir::operation::Tile &op) override;
void visit(const ir::operation::Transpose &op) override;
void visit(const ir::operation::Unpack &op) override;
void visit(const ir::operation::While &op) override;
- void visit(const ir::operation::ZerosLike &op) override;
private:
/**
@@ -149,7 +124,8 @@ private:
void handleSimpleUnaryOp(const ir::Operation &op, const ir::OperandIndex input_idx);
private:
- const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<ir::LoweredGraph>> &_lowered_subgs;
+ const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
+ &_lowered_subgs;
// _operands and _operations can be changed by controlflow operation
ir::Operands &_operands; // operands of current subgraph
ir::Operations &_operations; // operations of current subgraph
diff --git a/runtime/onert/core/include/exec/DynamicShapeInference.h b/runtime/onert/core/include/exec/DynamicShapeInference.h
index bca80db09..6f6659659 100644
--- a/runtime/onert/core/include/exec/DynamicShapeInference.h
+++ b/runtime/onert/core/include/exec/DynamicShapeInference.h
@@ -38,46 +38,34 @@ namespace exec
class DynamicShapeInferer : public ir::OperationVisitor
{
public:
- DynamicShapeInferer(const ir::Operands &operands, backend::IDynamicTensorManager *tensor_manager,
+ DynamicShapeInferer(const ir::Operands &operands,
const std::shared_ptr<backend::ITensorRegistry> &tensor_registry)
- : _operands(operands), _dynamic_tensor_manager(tensor_manager),
- _tensor_registry(tensor_registry)
+ : _operands(operands), _tensor_registry(tensor_registry)
{
UNUSED_RELEASE(_operands);
- UNUSED_RELEASE(_dynamic_tensor_manager);
UNUSED_RELEASE(_tensor_registry);
}
public:
// TODO Define visitors for operations. List them in alphabetic order.
// Remove TODO when any op starting from the alphabet is added
- void visit(const ir::operation::Abs &op) override;
- void visit(const ir::operation::Add &op) override;
void visit(const ir::operation::ArgMax &op) override;
void visit(const ir::operation::BatchMatMul &op) override;
+ void visit(const ir::operation::BinaryArithmetic &op) override;
void visit(const ir::operation::BroadcastTo &op) override;
- void visit(const ir::operation::Cast &op) override;
void visit(const ir::operation::Comparison &op) override;
void visit(const ir::operation::Concat &op) override;
void visit(const ir::operation::Conv2D &op) override;
- void visit(const ir::operation::Cos &op) override;
- void visit(const ir::operation::Div &op) override;
- void visit(const ir::operation::Exp &op) override;
+ void visit(const ir::operation::ElementwiseActivation &op) override;
+ void visit(const ir::operation::ElementwiseBinary &op) override;
+ void visit(const ir::operation::ElementwiseUnary &op) override;
void visit(const ir::operation::ExpandDims &op) override;
void visit(const ir::operation::Fill &op) override;
void visit(const ir::operation::FullyConnected &op) override;
void visit(const ir::operation::FusedBatchNorm &op) override;
void visit(const ir::operation::Gather &op) override;
- void visit(const ir::operation::Log &op) override;
- void visit(const ir::operation::LogicalNot &op) override;
- void visit(const ir::operation::LogicalOr &op) override;
- void visit(const ir::operation::Logistic &op) override;
void visit(const ir::operation::L2Normalization &op) override;
void visit(const ir::operation::MatrixBandPart &op) override;
- void visit(const ir::operation::Max &op) override;
- void visit(const ir::operation::Min &op) override;
- void visit(const ir::operation::Mul &op) override;
- void visit(const ir::operation::Neg &op) override;
void visit(const ir::operation::OneHot &op) override;
void visit(const ir::operation::Pack &op) override;
void visit(const ir::operation::Pad &op) override;
@@ -87,27 +75,21 @@ public:
void visit(const ir::operation::Range &op) override;
void visit(const ir::operation::Reduce &op) override;
void visit(const ir::operation::Reshape &op) override;
- void visit(const ir::operation::Round &op) override;
- void visit(const ir::operation::RSQRT &op) override;
void visit(const ir::operation::ResizeBilinear &op) override;
void visit(const ir::operation::Reverse &op) override;
void visit(const ir::operation::Select &op) override;
void visit(const ir::operation::Shape &op) override;
- void visit(const ir::operation::Sin &op) override;
void visit(const ir::operation::Slice &op) override;
void visit(const ir::operation::Softmax &op) override;
void visit(const ir::operation::SpaceToBatchND &op) override;
void visit(const ir::operation::Split &op) override;
void visit(const ir::operation::Squeeze &op) override;
void visit(const ir::operation::StridedSlice &op) override;
- void visit(const ir::operation::Sub &op) override;
void visit(const ir::operation::SquaredDifference &op) override;
- void visit(const ir::operation::Tanh &op) override;
void visit(const ir::operation::Tile &op) override;
void visit(const ir::operation::Transpose &op) override;
void visit(const ir::operation::Unpack &op) override;
// TODO write op starting from V
- void visit(const ir::operation::ZerosLike &op) override;
private:
/**
@@ -127,11 +109,6 @@ private:
*/
const ir::Operands &_operands;
/**
- * @brief To allocate memory for output tensor if needed
- */
- // TODO Remove this, as it is no longer used
- backend::IDynamicTensorManager *_dynamic_tensor_manager;
- /**
* @brief To get tensor object and access tensor-level info, e.g., ITensor::buffer()
*/
std::shared_ptr<backend::ITensorRegistry> _tensor_registry;
diff --git a/runtime/onert/core/include/exec/IExecutor.h b/runtime/onert/core/include/exec/IExecutor.h
index 46e05a289..6c8bab67c 100644
--- a/runtime/onert/core/include/exec/IExecutor.h
+++ b/runtime/onert/core/include/exec/IExecutor.h
@@ -80,8 +80,6 @@ struct DynAllocInfo
{
/// @brief index of input tensor whose memory needs to be allocated at execution time
ir::OperandIndex ind;
- /// @brief dynamic tensor manager that can allocate memory when input tensor is dynamic
- backend::IDynamicTensorManager *dyn_tensor_manager;
};
using DynAllocInfoMap = std::unordered_map<std::shared_ptr<backend::ITensor>, DynAllocInfo>;
diff --git a/runtime/onert/core/include/exec/IODescription.h b/runtime/onert/core/include/exec/IODescription.h
index c10c36756..d1810ec3b 100644
--- a/runtime/onert/core/include/exec/IODescription.h
+++ b/runtime/onert/core/include/exec/IODescription.h
@@ -62,8 +62,8 @@ struct IODescription
{
std::vector<std::unique_ptr<InputDesc>> inputs;
std::vector<std::unique_ptr<OutputDesc>> outputs;
- // Contains shape of input set by set_input_tensorinfo
- std::unordered_map<ir::IOIndex, ir::Shape> input_shape_signature;
+ // Contains shape of input set by nnfw_set_input_tensorinfo(..)
+ std::unordered_map<ir::IOIndex, ir::Shape> dynamic_input_shapes;
};
} // namespace exec
diff --git a/runtime/onert/core/include/ir/Graph.h b/runtime/onert/core/include/ir/Graph.h
index fb956fedf..2103e6e64 100644
--- a/runtime/onert/core/include/ir/Graph.h
+++ b/runtime/onert/core/include/ir/Graph.h
@@ -60,8 +60,8 @@ public:
OperandIndex addOperand(const Shape &shape, const TypeInfo &type);
OperationIndex addOperation(std::unique_ptr<Operation> &&node);
void setOperandValue(const OperandIndex &ind, std::shared_ptr<Data> data);
- void addInput(const OperandIndex &ind);
- void addOutput(const OperandIndex &ind);
+ void addInput(const OperandIndex &ind, const std::string &name = "");
+ void addOutput(const OperandIndex &ind, const std::string &name = "");
void finishBuilding(void);
void removeOperand(const OperandIndex &ind) { _operands.remove(ind); }
bool isBuildingPhase(void) const { return _phase == Phase::BUILDING; }
@@ -94,6 +94,8 @@ public:
OperandIndexSequence &getInputs() { return _inputs; }
const OperandIndexSequence &getOutputs() const { return _outputs; }
OperandIndexSequence &getOutputs() { return _outputs; }
+ IOIndex getInputIndex(const std::string &name) const;
+ IOIndex getOutputIndex(const std::string &name) const;
const Operands &operands() const { return _operands; }
Operands &operands() { return _operands; } // TODO Remove this non-const accessor
const Operations &operations() const { return _operations; }
@@ -108,6 +110,8 @@ private:
Operands _operands;
OperandIndexSequence _inputs;
OperandIndexSequence _outputs;
+ std::unordered_map<std::string, IOIndex> _name_to_input;
+ std::unordered_map<std::string, IOIndex> _name_to_output;
// Child subgraphs
std::shared_ptr<Subgraphs> _subgraphs;
// TFLite and circle's default layout is NHWC;
diff --git a/runtime/onert/core/include/ir/InternalType.h b/runtime/onert/core/include/ir/InternalType.h
index e42db72cf..1d962c185 100644
--- a/runtime/onert/core/include/ir/InternalType.h
+++ b/runtime/onert/core/include/ir/InternalType.h
@@ -40,6 +40,12 @@ struct Stride
uint32_t horizontal;
};
+struct Dilation
+{
+ uint32_t width_factor;
+ uint32_t height_factor;
+};
+
} // namespace ir
} // namespace onert
diff --git a/runtime/onert/core/include/ir/LoweredGraph.h b/runtime/onert/core/include/ir/LoweredGraph.h
deleted file mode 100644
index d6583df24..000000000
--- a/runtime/onert/core/include/ir/LoweredGraph.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_LOWERED_GRAPH_H__
-#define __ONERT_IR_LOWERED_GRAPH_H__
-
-#include "ir/Graph.h"
-#include "ir/LowerInfoMap.h"
-#include "ir/OpSequences.h"
-#include "compiler/BackendResolver.h"
-#include "compiler/Compiler.h"
-
-namespace onert
-{
-namespace ir
-{
-
-/**
- * @brief Class that contains lowering information on graph.
- * In addition, after lowering, operands in graph will be set to "dynamic"
- * if the shape of output of an operation cannot be decided at compilation time.
- */
-class LoweredGraph
-{
-public:
- LoweredGraph(const Graph &graph, const compiler::CompilerOptions &options);
-
- Graph &graph() { return _graph; }
- const Graph &graph() const { return _graph; }
- const LowerInfoMap *getLowerInfo() const { return &_lower_info_map; }
- const operation::LowerInfo *getLowerInfo(const OpSequenceIndex &op_seq_index) const;
- void setLowerInfo(const OpSequenceIndex &op_seq_index,
- std::unique_ptr<operation::LowerInfo> &&lower_info);
- void removeLowerInfo(const OpSequenceIndex &op_seq_index);
- const operand::LowerInfo *getLowerInfo(const OperandIndex &index) const;
- operand::LowerInfo *getLowerInfo(const OperandIndex &index);
- void setLowerInfo(const OperandIndex &index, std::unique_ptr<operand::LowerInfo> &&lower_info);
- void removeLowerInfo(const OperandIndex &index);
- OpSequences &op_seqs() { return _op_seqs; }
- const OpSequences &op_seqs() const { return _op_seqs; }
- void iterateTopolOpSeqs(
- const std::function<void(const OpSequenceIndex &, const OpSequence &)> &fn) const;
- void iterateTopolOpSeqs(const std::function<void(const OpSequenceIndex &, OpSequence &)> &fn);
- const backend::BackendContexts &backend_contexts() { return _backend_contexts; }
- const backend::BackendContexts &backend_contexts() const { return _backend_contexts; }
- std::shared_ptr<ir::OperationIndexMap<int64_t>> indexed_ranks() { return _indexed_ranks; }
-
-private:
- void makeOpSequences(OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info,
- const compiler::CompilerOptions &options,
- const compiler::BackendResolver &backend_resolver);
-
- void
- manipulateLowerInfo(OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info,
- bool is_primary);
- void dumpLowerInfo();
- bool mergeable(const OpSequenceIndex &op_seq_index, const OperationIndex &node_index,
- Layout layout, const compiler::BackendResolver &backend_resolver);
- OpSequenceIndex appendFreshSingleOpSequence(const OperationIndex &node_index,
- const Operation &node);
-
-private:
- Graph _graph;
- backend::BackendContexts _backend_contexts;
- std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
- LowerInfoMap _lower_info_map;
- // Pass(for Perm) can accept only graph so that Graph has OpSequences as a member
- OpSequences _op_seqs;
-};
-
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_LOWERED_GRAPH_H__
diff --git a/runtime/onert/core/include/ir/OpSequences.h b/runtime/onert/core/include/ir/OpSequences.h
index 6ed8499bc..ab258f395 100644
--- a/runtime/onert/core/include/ir/OpSequences.h
+++ b/runtime/onert/core/include/ir/OpSequences.h
@@ -63,13 +63,6 @@ public:
*/
OpSequenceIndex getOperation(const OperationIndex &operation_index) const;
/**
- * @brief Dump OpSequences
- *
- * @param msg Message that will be displayed
- * @param graph Graph that has information used for dump
- */
- void dump(const std::string &msg, const Operations &operations) const;
- /**
* @brief Remove an operation from OpSequence
*
* @param operation_index Operation index to be removed
@@ -84,6 +77,14 @@ private:
mutable std::unordered_map<OperationIndex, OpSequenceIndex> _seq_indexes;
};
+/**
+ * @brief Dump OpSequences
+ *
+ * @param op_seqs Operation Sequences
+ * @param operations Operation context
+ */
+void dumpOpSequences(const OpSequences &op_seqs, const Operations &operations);
+
} // namespace ir
} // namespace onert
diff --git a/runtime/onert/core/include/ir/Operations.Include.h b/runtime/onert/core/include/ir/Operations.Include.h
index 30c4ff25a..17bbbc29c 100644
--- a/runtime/onert/core/include/ir/Operations.Include.h
+++ b/runtime/onert/core/include/ir/Operations.Include.h
@@ -17,10 +17,10 @@
// This file has no ifdef guard intentionally
#include "ir/operation/BatchToSpaceND.h"
+#include "ir/operation/BinaryArithmetic.h"
#include "ir/operation/BroadcastTo.h"
#include "ir/operation/Conv2D.h"
-#include "ir/operation/MaxPool2D.h"
-#include "ir/operation/AvgPool2D.h"
+#include "ir/operation/Pool2D.h"
#include "ir/operation/Concat.h"
#include "ir/operation/Reshape.h"
#include "ir/operation/Fill.h"
@@ -29,51 +29,32 @@
#include "ir/operation/Transpose.h"
#include "ir/operation/Permute.h"
#include "ir/operation/Reduce.h"
-#include "ir/operation/Add.h"
-#include "ir/operation/Sub.h"
#include "ir/operation/DepthwiseConv2D.h"
#include "ir/operation/Slice.h"
#include "ir/operation/StridedSlice.h"
-#include "ir/operation/Mul.h"
#include "ir/operation/Squeeze.h"
-#include "ir/operation/Tanh.h"
-#include "ir/operation/Log.h"
-#include "ir/operation/Logistic.h"
-#include "ir/operation/Cast.h"
-#include "ir/operation/Div.h"
-#include "ir/operation/Exp.h"
+#include "ir/operation/ElementwiseActivation.h"
+#include "ir/operation/ElementwiseBinary.h"
+#include "ir/operation/ElementwiseUnary.h"
#include "ir/operation/ExpandDims.h"
#include "ir/operation/Comparison.h"
-#include "ir/operation/LogicalAnd.h"
-#include "ir/operation/LogicalOr.h"
-#include "ir/operation/LogicalNot.h"
#include "ir/operation/LSTM.h"
-#include "ir/operation/RSQRT.h"
-#include "ir/operation/ReLU.h"
#include "ir/operation/ResizeBilinear.h"
-#include "ir/operation/ReLU1.h"
-#include "ir/operation/ReLU6.h"
+#include "ir/operation/ResizeNearestNeighbor.h"
#include "ir/operation/Reverse.h"
#include "ir/operation/RNN.h"
-#include "ir/operation/Round.h"
-#include "ir/operation/Floor.h"
#include "ir/operation/SpaceToBatchND.h"
#include "ir/operation/SpaceToDepth.h"
-#include "ir/operation/L2Pool2D.h"
#include "ir/operation/EmbeddingLookup.h"
#include "ir/operation/L2Normalization.h"
#include "ir/operation/HashtableLookup.h"
#include "ir/operation/InstanceNorm.h"
#include "ir/operation/PReLU.h"
#include "ir/operation/TransposeConv.h"
-#include "ir/operation/SQRT.h"
#include "ir/operation/SquaredDifference.h"
#include "ir/operation/TopKV2.h"
#include "ir/operation/Gather.h"
-#include "ir/operation/Neg.h"
-#include "ir/operation/Abs.h"
#include "ir/operation/ArgMax.h"
-#include "ir/operation/Dequantize.h"
#include "ir/operation/LocalResponseNormalization.h"
#include "ir/operation/DepthToSpace.h"
#include "ir/operation/Pack.h"
@@ -82,27 +63,22 @@
#include "ir/operation/SplitV.h"
#include "ir/operation/Unpack.h"
#include "ir/operation/Pad.h"
-#include "ir/operation/Min.h"
-#include "ir/operation/Max.h"
#include "ir/operation/Custom.h"
#include "ir/operation/Einsum.h"
#include "ir/operation/OneHot.h"
-#include "ir/operation/Cos.h"
-#include "ir/operation/Sin.h"
#include "ir/operation/Shape.h"
#include "ir/operation/ConvertFp32ToFp16.h"
#include "ir/operation/ConvertFp16ToFp32.h"
#include "ir/operation/If.h"
#include "ir/operation/While.h"
#include "ir/operation/Pow.h"
-#include "ir/operation/ZerosLike.h"
#include "ir/operation/Tile.h"
#include "ir/operation/Range.h"
+#include "ir/operation/Rank.h"
#include "ir/operation/BCQFullyConnected.h"
#include "ir/operation/BCQGather.h"
#include "ir/operation/MatrixBandPart.h"
#include "ir/operation/BatchMatMul.h"
#include "ir/operation/FusedBatchNorm.h"
#include "ir/operation/LogSoftmax.h"
-#include "ir/operation/Quantize.h"
#include "ir/operation/StatelessRandomUniform.h"
diff --git a/runtime/onert/core/include/ir/Operations.lst b/runtime/onert/core/include/ir/Operations.lst
index 75c6d8221..ab2146821 100644
--- a/runtime/onert/core/include/ir/Operations.lst
+++ b/runtime/onert/core/include/ir/Operations.lst
@@ -19,62 +19,44 @@
#endif
// Internal Name
-OP(Add)
-OP(Sub)
OP(BatchToSpaceND)
+OP(BinaryArithmetic)
OP(BroadcastTo)
-OP(Cast)
OP(Conv2D)
OP(DepthwiseConv2D)
-OP(AvgPool2D)
-OP(MaxPool2D)
+OP(Pool2D)
OP(Concat)
OP(Fill)
OP(FullyConnected)
OP(Reduce)
OP(Reshape)
-OP(Mul)
OP(Softmax)
OP(Squeeze)
OP(Slice)
OP(StridedSlice)
-OP(Tanh)
-OP(Logistic)
-OP(Div)
OP(Transpose)
-OP(Exp)
+OP(ElementwiseActivation)
+OP(ElementwiseBinary)
+OP(ElementwiseUnary)
OP(ExpandDims)
OP(Comparison)
-OP(LogicalAnd)
-OP(LogicalOr)
-OP(LogicalNot)
OP(LSTM)
-OP(RSQRT)
-OP(ReLU)
OP(ResizeBilinear)
-OP(ReLU1)
-OP(ReLU6)
+OP(ResizeNearestNeighbor)
OP(Reverse)
OP(RNN)
-OP(Round)
-OP(Floor)
OP(SpaceToBatchND)
OP(SpaceToDepth)
-OP(L2Pool2D)
OP(EmbeddingLookup)
OP(L2Normalization)
OP(HashtableLookup)
OP(InstanceNorm)
OP(PReLU)
OP(TransposeConv)
-OP(SQRT)
OP(SquaredDifference)
OP(TopKV2)
OP(Gather)
-OP(Neg)
-OP(Abs)
OP(ArgMax)
-OP(Dequantize)
OP(Einsum)
OP(LocalResponseNormalization)
OP(DepthToSpace)
@@ -86,26 +68,20 @@ OP(Unpack)
OP(Pad)
OP(Custom)
OP(Permute)
-OP(Min)
-OP(Max)
OP(OneHot)
-OP(Cos)
-OP(Sin)
OP(Shape)
OP(ConvertFp32ToFp16)
OP(ConvertFp16ToFp32)
OP(If)
OP(While)
-OP(Log)
OP(Pow)
-OP(ZerosLike)
OP(Tile)
OP(Range)
+OP(Rank)
OP(BCQFullyConnected)
OP(BCQGather)
OP(MatrixBandPart)
OP(BatchMatMul)
OP(FusedBatchNorm)
OP(LogSoftmax)
-OP(Quantize)
OP(StatelessRandomUniform)
diff --git a/runtime/onert/core/include/ir/Padding.h b/runtime/onert/core/include/ir/Padding.h
index b9053914d..8a7bcdbeb 100644
--- a/runtime/onert/core/include/ir/Padding.h
+++ b/runtime/onert/core/include/ir/Padding.h
@@ -65,7 +65,8 @@ struct Padding
// TODO Change to Padding struct's method
const ExplicitPadding calculatePadding(const Padding &padding, const FeatureShape &ifm_shape,
const FeatureShape &ofm_shape, const Stride &stride,
- uint32_t kw, uint32_t kh);
+ uint32_t kw, uint32_t kh, uint32_t dwf = 1,
+ uint32_t dhf = 1);
} // namespace ir
} // namespace onert
diff --git a/runtime/onert/core/include/ir/operation/Abs.h b/runtime/onert/core/include/ir/operation/Abs.h
deleted file mode 100644
index 9126c0027..000000000
--- a/runtime/onert/core/include/ir/operation/Abs.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_ABS_H__
-#define __ONERT_IR_OPERATION_ABS_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Abs : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Abs(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Abs; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_ABS_H__
diff --git a/runtime/onert/core/include/ir/operation/Add.h b/runtime/onert/core/include/ir/operation/BinaryArithmetic.h
index 5f5f4e0fe..110fff565 100644
--- a/runtime/onert/core/include/ir/operation/Add.h
+++ b/runtime/onert/core/include/ir/operation/BinaryArithmetic.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_IR_OPERATION_ADD_H__
-#define __ONERT_IR_OPERATION_ADD_H__
+#ifndef __ONERT_IR_OPERATION_BINARY_ARITHMETIC_H__
+#define __ONERT_IR_OPERATION_BINARY_ARITHMETIC_H__
#include "ir/Operation.h"
#include "ir/InternalType.h"
@@ -27,7 +27,7 @@ namespace ir
namespace operation
{
-class Add : public Operation
+class BinaryArithmetic final : public Operation
{
public:
enum Input
@@ -36,17 +36,28 @@ public:
RHS
};
+ enum class ArithmeticType
+ {
+ ADD,
+ SUB,
+ MUL,
+ DIV
+ };
+
struct Param
{
+ ArithmeticType arithmetic_type;
Activation activation;
};
public:
- Add(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
+ BinaryArithmetic(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
public:
void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Add; }
+ std::string name() const override;
+ OpCode opcode() const final { return OpCode::BinaryArithmetic; }
public:
const Param &param() const { return _param; }
@@ -59,4 +70,4 @@ private:
} // namespace ir
} // namespace onert
-#endif // __ONERT_IR_OPERATION_ADD_H__
+#endif // __ONERT_IR_OPERATION_BINARY_ARITHMETIC_H__
diff --git a/runtime/onert/core/include/ir/operation/BroadcastTo.h b/runtime/onert/core/include/ir/operation/BroadcastTo.h
index 98906adc2..06c033497 100644
--- a/runtime/onert/core/include/ir/operation/BroadcastTo.h
+++ b/runtime/onert/core/include/ir/operation/BroadcastTo.h
@@ -42,7 +42,7 @@ public:
public:
void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Cast; }
+ OpCode opcode() const final { return OpCode::BroadcastTo; }
};
} // namespace operation
diff --git a/runtime/onert/core/include/ir/operation/Cast.h b/runtime/onert/core/include/ir/operation/Cast.h
deleted file mode 100644
index 6fb8c105b..000000000
--- a/runtime/onert/core/include/ir/operation/Cast.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_CAST_H__
-#define __ONERT_IR_OPERATION_CAST_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Cast : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Cast(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Cast; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_CAST_H__
diff --git a/runtime/onert/core/include/ir/operation/Conv2D.h b/runtime/onert/core/include/ir/operation/Conv2D.h
index e23bf3eb3..d8c7b671b 100644
--- a/runtime/onert/core/include/ir/operation/Conv2D.h
+++ b/runtime/onert/core/include/ir/operation/Conv2D.h
@@ -45,6 +45,7 @@ public:
Stride stride;
Padding padding;
Activation activation;
+ Dilation dilation;
};
public:
diff --git a/runtime/onert/core/include/ir/operation/Cos.h b/runtime/onert/core/include/ir/operation/Cos.h
deleted file mode 100644
index a6d7851bd..000000000
--- a/runtime/onert/core/include/ir/operation/Cos.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_COS_H__
-#define __ONERT_IR_OPERATION_COS_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Cos : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Cos(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Cos; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_COS_H__
diff --git a/runtime/onert/core/include/ir/operation/Dequantize.h b/runtime/onert/core/include/ir/operation/Dequantize.h
deleted file mode 100644
index 97a08b33c..000000000
--- a/runtime/onert/core/include/ir/operation/Dequantize.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_DEQUANTIZE_H__
-#define __ONERT_IR_OPERATION_DEQUANTIZE_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Dequantize : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Dequantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Dequantize; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_DEQUANTIZE_H__
diff --git a/runtime/onert/core/include/ir/operation/Einsum.h b/runtime/onert/core/include/ir/operation/Einsum.h
index a3426ccbc..9892c24b8 100644
--- a/runtime/onert/core/include/ir/operation/Einsum.h
+++ b/runtime/onert/core/include/ir/operation/Einsum.h
@@ -41,7 +41,7 @@ public:
public:
void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Add; }
+ OpCode opcode() const final { return OpCode::Einsum; }
public:
const Param &param() const { return _param; }
diff --git a/runtime/onert/core/include/ir/operation/Div.h b/runtime/onert/core/include/ir/operation/ElementwiseActivation.h
index a7ec1c465..b2a1d3d2d 100644
--- a/runtime/onert/core/include/ir/operation/Div.h
+++ b/runtime/onert/core/include/ir/operation/ElementwiseActivation.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,11 +14,10 @@
* limitations under the License.
*/
-#ifndef __ONERT_IR_OPERATION_DIV_H__
-#define __ONERT_IR_OPERATION_DIV_H__
+#ifndef __ONERT_IR_OPERATION_ELEMENTWISE_ACTIVATION_H__
+#define __ONERT_IR_OPERATION_ELEMENTWISE_ACTIVATION_H__
#include "ir/Operation.h"
-#include "ir/InternalType.h"
namespace onert
{
@@ -27,30 +26,46 @@ namespace ir
namespace operation
{
-class Div : public Operation
+class ElementwiseActivation : public Operation
{
public:
enum Input
{
- LHS = 0,
- RHS
+ INPUT = 0
+ };
+
+ enum class Type
+ {
+ ELU,
+ LOGISTIC,
+ RELU,
+ TANH,
+ LEAKY_RELU
};
struct Param
{
- Activation activation;
+ Type op_type;
+ float alpha;
+ float beta;
+ Param() : op_type(Type::ELU), alpha(0.0f), beta(0.0f) {}
};
public:
- Div(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
+ ElementwiseActivation(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
public:
void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Div; }
+ std::string name() const override;
+ OpCode opcode() const final { return OpCode::ElementwiseActivation; }
public:
const Param &param() const { return _param; }
+public:
+ static float infinity;
+
private:
Param _param;
};
@@ -59,4 +74,4 @@ private:
} // namespace ir
} // namespace onert
-#endif // __ONERT_IR_OPERATION_DIV_H__
+#endif // __ONERT_IR_OPERATION_ELEMENTWISE_ACTIVATION_H__
diff --git a/runtime/onert/core/include/ir/operation/Mul.h b/runtime/onert/core/include/ir/operation/ElementwiseBinary.h
index 0f01b0ecf..dd07f6058 100644
--- a/runtime/onert/core/include/ir/operation/Mul.h
+++ b/runtime/onert/core/include/ir/operation/ElementwiseBinary.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,11 +14,10 @@
* limitations under the License.
*/
-#ifndef __ONERT_IR_OPERATION_MUL_H__
-#define __ONERT_IR_OPERATION_MUL_H__
+#ifndef __ONERT_IR_OPERATION_ELEMENTWISEBINARY_H__
+#define __ONERT_IR_OPERATION_ELEMENTWISEBINARY_H__
#include "ir/Operation.h"
-#include "ir/InternalType.h"
namespace onert
{
@@ -27,7 +26,7 @@ namespace ir
namespace operation
{
-class Mul : public Operation
+class ElementwiseBinary : public Operation
{
public:
enum Input
@@ -36,17 +35,27 @@ public:
RHS
};
+ enum class ElementwiseBinaryType
+ {
+ LOGICAL_AND,
+ LOGICAL_OR,
+ MAX,
+ MIN
+ };
+
struct Param
{
- Activation activation;
+ ElementwiseBinaryType op_type;
};
public:
- Mul(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
+ ElementwiseBinary(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
public:
void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Mul; }
+ std::string name() const override;
+ OpCode opcode() const final { return OpCode::ElementwiseBinary; }
public:
const Param &param() const { return _param; }
@@ -59,4 +68,4 @@ private:
} // namespace ir
} // namespace onert
-#endif // __ONERT_IR_OPERATION_MUL_H__
+#endif // __ONERT_IR_OPERATION_ELEMENTWISEBINARY_H__
diff --git a/runtime/onert/core/include/ir/operation/MaxPool2D.h b/runtime/onert/core/include/ir/operation/ElementwiseUnary.h
index 300f7cb3c..c40778a56 100644
--- a/runtime/onert/core/include/ir/operation/MaxPool2D.h
+++ b/runtime/onert/core/include/ir/operation/ElementwiseUnary.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,14 +14,10 @@
* limitations under the License.
*/
-#ifndef __ONERT_IR_OPERATION_MAXPOOL2D_H__
-#define __ONERT_IR_OPERATION_MAXPOOL2D_H__
-
-#include <memory>
+#ifndef __ONERT_IR_OPERATION_ELEMENTWISEUNARY_H__
+#define __ONERT_IR_OPERATION_ELEMENTWISEUNARY_H__
#include "ir/Operation.h"
-#include "ir/InternalType.h"
-#include "ir/Padding.h"
namespace onert
{
@@ -30,7 +26,7 @@ namespace ir
namespace operation
{
-class MaxPool2D : public Operation
+class ElementwiseUnary : public Operation
{
public:
enum Input
@@ -38,22 +34,40 @@ public:
INPUT = 0
};
+ enum class Type
+ {
+ ABS,
+ CAST,
+ COS,
+ DEQUANTIZE,
+ ERF,
+ EXP,
+ FLOOR,
+ LOG,
+ LOGICAL_NOT,
+ NEG,
+ QUANTIZE,
+ ROUND,
+ RSQRT,
+ SIN,
+ SQRT,
+ SQURE,
+ ZEROS_LIKE
+ };
+
struct Param
{
- uint32_t kh;
- uint32_t kw;
- Stride stride;
- Padding padding;
- Activation activation;
+ Type op_type;
};
public:
- MaxPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
+ ElementwiseUnary(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
public:
void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::MaxPool2D; }
+ std::string name() const override;
+ OpCode opcode() const final { return OpCode::ElementwiseUnary; }
public:
const Param &param() const { return _param; }
@@ -66,4 +80,4 @@ private:
} // namespace ir
} // namespace onert
-#endif // __ONERT_IR_OPERATION_MAXPOOL2D_H__
+#endif // __ONERT_IR_OPERATION_ELEMENTWISEUNARY_H__
diff --git a/runtime/onert/core/include/ir/operation/Exp.h b/runtime/onert/core/include/ir/operation/Exp.h
deleted file mode 100644
index 2e68ff07a..000000000
--- a/runtime/onert/core/include/ir/operation/Exp.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_EXP_H__
-#define __ONERT_IR_OPERATION_EXP_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Exp : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Exp(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Exp; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_EXP_H__
diff --git a/runtime/onert/core/include/ir/operation/Floor.h b/runtime/onert/core/include/ir/operation/Floor.h
deleted file mode 100644
index b34699c22..000000000
--- a/runtime/onert/core/include/ir/operation/Floor.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_FLOOR_H__
-#define __ONERT_IR_OPERATION_FLOOR_H__
-
-#include <memory>
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Floor : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Floor(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Floor; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_FLOOR_H__
diff --git a/runtime/onert/core/include/ir/operation/Log.h b/runtime/onert/core/include/ir/operation/Log.h
deleted file mode 100644
index a6e3ca3f6..000000000
--- a/runtime/onert/core/include/ir/operation/Log.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_LOG_H__
-#define __ONERT_IR_OPERATION_LOG_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Log : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Log(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Log; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_LOG_H__
diff --git a/runtime/onert/core/include/ir/operation/LogicalAnd.h b/runtime/onert/core/include/ir/operation/LogicalAnd.h
deleted file mode 100644
index dc853b6a9..000000000
--- a/runtime/onert/core/include/ir/operation/LogicalAnd.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_LOGICAL_AND_H__
-#define __ONERT_IR_OPERATION_LOGICAL_AND_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class LogicalAnd : public Operation
-{
-public:
- enum Input
- {
- INPUT0 = 0,
- INPUT1 = 1,
- };
-
-public:
- LogicalAnd(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::LogicalAnd; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_LOGICAL_AND_H__
diff --git a/runtime/onert/core/include/ir/operation/LogicalNot.h b/runtime/onert/core/include/ir/operation/LogicalNot.h
deleted file mode 100644
index 9519f6d47..000000000
--- a/runtime/onert/core/include/ir/operation/LogicalNot.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_LOGICAL_NOT_H__
-#define __ONERT_IR_OPERATION_LOGICAL_NOT_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class LogicalNot : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0,
- };
-
-public:
- LogicalNot(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::LogicalNot; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_LOGICAL_NOT_H__
diff --git a/runtime/onert/core/include/ir/operation/LogicalOr.h b/runtime/onert/core/include/ir/operation/LogicalOr.h
deleted file mode 100644
index c4b658cd9..000000000
--- a/runtime/onert/core/include/ir/operation/LogicalOr.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_LOGICAL_OR_H__
-#define __ONERT_IR_OPERATION_LOGICAL_OR_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class LogicalOr : public Operation
-{
-public:
- enum Input
- {
- INPUT0 = 0,
- INPUT1 = 1,
- };
-
-public:
- LogicalOr(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::LogicalOr; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_LOGICAL_OR_H__
diff --git a/runtime/onert/core/include/ir/operation/Logistic.h b/runtime/onert/core/include/ir/operation/Logistic.h
deleted file mode 100644
index 5421e1c84..000000000
--- a/runtime/onert/core/include/ir/operation/Logistic.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_LOGISTIC_H__
-#define __ONERT_IR_OPERATION_LOGISTIC_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Logistic : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Logistic(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Logistic; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_LOGISTIC_H__
diff --git a/runtime/onert/core/include/ir/operation/Max.h b/runtime/onert/core/include/ir/operation/Max.h
deleted file mode 100644
index df72d3ae9..000000000
--- a/runtime/onert/core/include/ir/operation/Max.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_MAX_H__
-#define __ONERT_IR_OPERATION_MAX_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Max : public Operation
-{
-public:
- enum Input
- {
- LHS = 0,
- RHS
- };
-
-public:
- Max(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Max; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_MAX_H__
diff --git a/runtime/onert/core/include/ir/operation/Mean.h b/runtime/onert/core/include/ir/operation/Mean.h
deleted file mode 100644
index ce2da908d..000000000
--- a/runtime/onert/core/include/ir/operation/Mean.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_MEAN_H__
-#define __ONERT_IR_OPERATION_MEAN_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Mean : public Operation
-{
-public:
- enum Input
- {
- INPUT,
- AXES
- };
-
- struct Param
- {
- bool keep_dims;
- };
-
-public:
- Mean(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Mean; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_MEAN_H__
diff --git a/runtime/onert/core/include/ir/operation/Min.h b/runtime/onert/core/include/ir/operation/Min.h
deleted file mode 100644
index 117301c00..000000000
--- a/runtime/onert/core/include/ir/operation/Min.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_MIN_H__
-#define __ONERT_IR_OPERATION_MIN_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Min : public Operation
-{
-public:
- enum Input
- {
- LHS = 0,
- RHS
- };
-
-public:
- Min(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Min; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_MIN_H__
diff --git a/runtime/onert/core/include/ir/operation/Neg.h b/runtime/onert/core/include/ir/operation/Neg.h
deleted file mode 100644
index f8123c485..000000000
--- a/runtime/onert/core/include/ir/operation/Neg.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_NEG_H__
-#define __ONERT_IR_OPERATION_NEG_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Neg : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Neg(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Neg; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_NEG_H__
diff --git a/runtime/onert/core/include/ir/operation/AvgPool2D.h b/runtime/onert/core/include/ir/operation/Pool2D.h
index d5b300a35..22425b4c2 100644
--- a/runtime/onert/core/include/ir/operation/AvgPool2D.h
+++ b/runtime/onert/core/include/ir/operation/Pool2D.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_IR_OPERATION_AVGPOOL2D_H__
-#define __ONERT_IR_OPERATION_AVGPOOL2D_H__
+#ifndef __ONERT_IR_OPERATION_POOL2D_H__
+#define __ONERT_IR_OPERATION_POOL2D_H__
#include <memory>
@@ -30,7 +30,7 @@ namespace ir
namespace operation
{
-class AvgPool2D : public Operation
+class Pool2D : public Operation
{
public:
enum Input
@@ -38,23 +38,31 @@ public:
INPUT = 0
};
+ enum class PoolType
+ {
+ AVG,
+ L2,
+ MAX,
+ };
+
struct Param
{
+ PoolType op_type;
uint32_t kh;
uint32_t kw;
-
Stride stride;
Padding padding;
Activation activation;
};
public:
- AvgPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
+ Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
public:
void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::AvgPool2D; }
+ std::string name() const override;
+ OpCode opcode() const final { return OpCode::Pool2D; }
public:
const Param &param() const { return _param; }
@@ -67,4 +75,4 @@ private:
} // namespace ir
} // namespace onert
-#endif // __ONERT_IR_OPERATION_AVGPOOL2D_H__
+#endif // __ONERT_IR_OPERATION_POOL2D_H__
diff --git a/runtime/onert/core/include/ir/operation/Quantize.h b/runtime/onert/core/include/ir/operation/Quantize.h
deleted file mode 100644
index 2533ce432..000000000
--- a/runtime/onert/core/include/ir/operation/Quantize.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_QUANTIZE_H__
-#define __ONERT_IR_OPERATION_QUANTIZE_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Quantize : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0,
- };
-
-public:
- Quantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Quantize; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_QUANTIZE_H__
diff --git a/runtime/onert/core/include/ir/operation/RSQRT.h b/runtime/onert/core/include/ir/operation/RSQRT.h
deleted file mode 100644
index 64bb4f10a..000000000
--- a/runtime/onert/core/include/ir/operation/RSQRT.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_RSQRT_H__
-#define __ONERT_IR_OPERATION_RSQRT_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class RSQRT : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- RSQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::RSQRT; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_RSQRT_H__
diff --git a/runtime/onert/core/include/ir/operation/Round.h b/runtime/onert/core/include/ir/operation/Rank.h
index 44af0d861..2fd24ce23 100644
--- a/runtime/onert/core/include/ir/operation/Round.h
+++ b/runtime/onert/core/include/ir/operation/Rank.h
@@ -14,8 +14,10 @@
* limitations under the License.
*/
-#ifndef __ONERT_IR_OPERATION_ROUND_H__
-#define __ONERT_IR_OPERATION_ROUND_H__
+#ifndef __ONERT_IR_OPERATION_RANK_H__
+#define __ONERT_IR_OPERATION_RANK_H__
+
+#include <memory>
#include "ir/Operation.h"
@@ -26,7 +28,7 @@ namespace ir
namespace operation
{
-class Round : public Operation
+class Rank : public Operation
{
public:
enum Input
@@ -35,15 +37,15 @@ public:
};
public:
- Round(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+ Rank(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
public:
void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Round; }
+ OpCode opcode() const final { return OpCode::Rank; }
};
} // namespace operation
} // namespace ir
} // namespace onert
-#endif // __ONERT_IR_OPERATION_ROUND_H__
+#endif // __ONERT_IR_OPERATION_RANK_H__
diff --git a/runtime/onert/core/include/ir/operation/ReLU.h b/runtime/onert/core/include/ir/operation/ReLU.h
deleted file mode 100644
index 9eb0c091b..000000000
--- a/runtime/onert/core/include/ir/operation/ReLU.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_RELU_H__
-#define __ONERT_IR_OPERATION_RELU_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class ReLU : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- ReLU(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::ReLU; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_RELU_H__
diff --git a/runtime/onert/core/include/ir/operation/ReLU1.h b/runtime/onert/core/include/ir/operation/ReLU1.h
deleted file mode 100644
index 134ee573a..000000000
--- a/runtime/onert/core/include/ir/operation/ReLU1.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_ReLU1_H__
-#define __ONERT_IR_OPERATION_ReLU1_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class ReLU1 : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- ReLU1(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::ReLU1; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_ReLU1_H__
diff --git a/runtime/onert/core/include/ir/operation/ReLU6.h b/runtime/onert/core/include/ir/operation/ReLU6.h
deleted file mode 100644
index e658c4925..000000000
--- a/runtime/onert/core/include/ir/operation/ReLU6.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_ReLU6_H__
-#define __ONERT_IR_OPERATION_ReLU6_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class ReLU6 : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- ReLU6(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::ReLU6; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_ReLU6_H__
diff --git a/runtime/onert/core/include/ir/operation/L2Pool2D.h b/runtime/onert/core/include/ir/operation/ResizeNearestNeighbor.h
index d369fd5fc..e4d810eeb 100644
--- a/runtime/onert/core/include/ir/operation/L2Pool2D.h
+++ b/runtime/onert/core/include/ir/operation/ResizeNearestNeighbor.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,14 +14,12 @@
* limitations under the License.
*/
-#ifndef __ONERT_IR_OPERATION_L2_POOL_2D_H__
-#define __ONERT_IR_OPERATION_L2_POOL_2D_H__
+#ifndef __ONERT_IR_OPERATION_RESIZE_NEAREST_NEIGHBOR_H__
+#define __ONERT_IR_OPERATION_RESIZE_NEAREST_NEIGHBOR_H__
#include <memory>
#include "ir/Operation.h"
-#include "ir/InternalType.h"
-#include "ir/Padding.h"
namespace onert
{
@@ -30,7 +28,7 @@ namespace ir
namespace operation
{
-class L2Pool2D : public Operation
+class ResizeNearestNeighbor : public Operation
{
public:
enum Input
@@ -40,20 +38,18 @@ public:
struct Param
{
- Padding padding;
- Stride stride;
- uint32_t kw;
- uint32_t kh;
- Activation activation;
+ int32_t height_out;
+ int32_t width_out;
+ bool align_corners;
};
public:
- L2Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param);
+ ResizeNearestNeighbor(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
public:
void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::L2Pool2D; }
+ OpCode opcode() const final { return OpCode::ResizeNearestNeighbor; }
public:
const Param &param() const { return _param; }
@@ -66,4 +62,4 @@ private:
} // namespace ir
} // namespace onert
-#endif // __ONERT_IR_OPERATION_L2_POOL_2D_H__
+#endif // __ONERT_IR_OPERATION_RESIZE_NEAREST_NEIGHBOR_H__
diff --git a/runtime/onert/core/include/ir/operation/SQRT.h b/runtime/onert/core/include/ir/operation/SQRT.h
deleted file mode 100644
index 8563b1ab1..000000000
--- a/runtime/onert/core/include/ir/operation/SQRT.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_SQRT_H__
-#define __ONERT_IR_OPERATION_SQRT_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class SQRT : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- SQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::SQRT; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_SQRT_H__
diff --git a/runtime/onert/core/include/ir/operation/Select.h b/runtime/onert/core/include/ir/operation/Select.h
index 400ac9d3e..33bf67886 100644
--- a/runtime/onert/core/include/ir/operation/Select.h
+++ b/runtime/onert/core/include/ir/operation/Select.h
@@ -41,7 +41,7 @@ public:
public:
void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Abs; }
+ OpCode opcode() const final { return OpCode::Select; }
};
} // namespace operation
diff --git a/runtime/onert/core/include/ir/operation/Sin.h b/runtime/onert/core/include/ir/operation/Sin.h
deleted file mode 100644
index aef44ab2e..000000000
--- a/runtime/onert/core/include/ir/operation/Sin.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_SIN_H__
-#define __ONERT_IR_OPERATION_SIN_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Sin : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Sin(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Sin; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_SIN_H__
diff --git a/runtime/onert/core/include/ir/operation/Sub.h b/runtime/onert/core/include/ir/operation/Sub.h
deleted file mode 100644
index 0674e6e4d..000000000
--- a/runtime/onert/core/include/ir/operation/Sub.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_SUB_H__
-#define __ONERT_IR_OPERATION_SUB_H__
-
-#include "ir/Operation.h"
-#include "ir/InternalType.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Sub : public Operation
-{
-public:
- enum Input
- {
- LHS = 0,
- RHS
- };
-
- struct Param
- {
- Activation activation;
- };
-
-public:
- Sub(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Sub; }
-
-public:
- const Param &param() const { return _param; }
-
-private:
- Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_SUB_H__
diff --git a/runtime/onert/core/include/ir/operation/Tanh.h b/runtime/onert/core/include/ir/operation/Tanh.h
deleted file mode 100644
index 9b8d03bca..000000000
--- a/runtime/onert/core/include/ir/operation/Tanh.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_TANH_H__
-#define __ONERT_IR_OPERATION_TANH_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Tanh : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- Tanh(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Tanh; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_TANH_H__
diff --git a/runtime/onert/core/include/ir/operation/ZerosLike.h b/runtime/onert/core/include/ir/operation/ZerosLike.h
deleted file mode 100644
index 7c2851858..000000000
--- a/runtime/onert/core/include/ir/operation/ZerosLike.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_ZEROS_LIKE_H__
-#define __ONERT_IR_OPERATION_ZEROS_LIKE_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class ZerosLike : public Operation
-{
-public:
- enum Input
- {
- INPUT = 0
- };
-
-public:
- ZerosLike(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
- void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::ZerosLike; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_ZEROS_LIKE_H__
diff --git a/runtime/onert/core/include/util/Config.lst b/runtime/onert/core/include/util/Config.lst
index 1718e034c..5077fad69 100644
--- a/runtime/onert/core/include/util/Config.lst
+++ b/runtime/onert/core/include/util/Config.lst
@@ -20,7 +20,7 @@
// Name | Type | Default
CONFIG(GRAPH_DOT_DUMP , int , "0")
-CONFIG(BACKENDS , std::string , "cpu;acl_cl;acl_neon")
+CONFIG(BACKENDS , std::string , "cpu;acl_cl;acl_neon;bcq") // FIXME Remove bcq
CONFIG(OP_BACKEND_ALLOPS , std::string , "")
CONFIG(OP_BACKEND_MAP , std::string , "")
CONFIG(DISABLE_COMPILE , bool , "0")
diff --git a/runtime/onert/backend/cpu/ops/ExpLayer.h b/runtime/onert/core/include/util/Exceptions.h
index cd27b0e40..fc3fa0f64 100644
--- a/runtime/onert/backend/cpu/ops/ExpLayer.h
+++ b/runtime/onert/core/include/util/Exceptions.h
@@ -14,44 +14,35 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CPU_OPS_EXPLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_EXPLAYER_H__
+#ifndef __ONERT_UTIL_ONERTEXCEPTION_H__
+#define __ONERT_UTIL_ONERTEXCEPTION_H__
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
+#include <string>
namespace onert
{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class ExpLayer : public ::onert::exec::IFunction
+class OnertException : public std::exception
{
public:
- ExpLayer();
-
-public:
- void expFloat32();
+ OnertException(const std::string &msg) : _msg{msg} {}
+ OnertException(const std::string &tag, const std::string &msg) : _msg{tag + " : " + msg} {}
- void expQuant8();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
+ const char *what() const noexcept override { return _msg.c_str(); }
private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
+ std::string _msg;
+};
+
+class InsufficientBufferSizeException : public OnertException
+{
+public:
+ InsufficientBufferSizeException(const std::string &msg)
+ : OnertException{"InsufficientBufferSize", msg}
+ {
+ }
};
-} // namespace ops
-} // namespace cpu
-} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CPU_OPS_EXPLAYER_H__
+#endif // __ONERT_UTIL_ONERTEXCEPTION_H__
diff --git a/runtime/onert/core/include/util/ShapeInference.h b/runtime/onert/core/include/util/ShapeInference.h
index a68c22b16..1ebed48f2 100644
--- a/runtime/onert/core/include/util/ShapeInference.h
+++ b/runtime/onert/core/include/util/ShapeInference.h
@@ -19,15 +19,13 @@
#include "Utils.h"
-#include "ir/operation/AvgPool2D.h"
#include "ir/operation/Concat.h"
-#include "ir/operation/MaxPool2D.h"
#include "ir/operation/Conv2D.h"
#include "ir/operation/DepthwiseConv2D.h"
+#include "ir/operation/Pool2D.h"
#include "ir/operation/Reshape.h"
-#include "ir/operation/RSQRT.h"
#include "ir/operation/StridedSlice.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
#include "ir/Index.h"
#include "ir/Layout.h"
#include "ir/OperationVisitor.h"
@@ -46,8 +44,6 @@ using Shapes = std::vector<ir::Shape>;
ir::Shape inferArgMaxShape(const ir::Shape &input_shape, int axis, int rank);
-ir::Shape inferAvgPoolShape(const ir::Shape &in_shape, const ir::operation::AvgPool2D::Param &param,
- ir::Layout layout = ir::Layout::NHWC);
ir::Shape inferBatchMatMulShape(const ir::Shape &lhs_shape, const ir::Shape &rhs_shape,
const ir::operation::BatchMatMul::Param &param);
@@ -74,15 +70,15 @@ ir::Shape inferFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &k
ir::Shape inferGatherShape(const ir::Shape &input_shape, const ir::Shape &indices_shape, int axis,
int rank);
-ir::Shape inferMaxPoolShape(const ir::Shape &in_shape, const ir::operation::MaxPool2D::Param &param,
- ir::Layout layout = ir::Layout::NHWC);
-
ir::Shape inferOnehotShape(const ir::Shape &input_shape, const int depth, int axis);
ir::Shape inferPackShape(const ir::Shape &input_shape, int axis, int rank, int num);
ir::Shape inferPadShape(const ir::Shape &in_shape, const int32_t *pad_buf, const size_t num_pads);
+ir::Shape inferPoolShape(const ir::Shape &in_shape, const ir::operation::Pool2D::Param &param,
+ ir::Layout layout = ir::Layout::NHWC);
+
template <typename T> ir::Shape inferRangeShape(T start_val, T limit_val, T delta_val);
ir::Shape inferReshapeShape(const int32_t *shape_buf, const int32_t shape_num_elements,
diff --git a/runtime/onert/core/src/backend/controlflow/Backend.h b/runtime/onert/core/src/backend/controlflow/Backend.h
index 3c7325912..670f7750f 100644
--- a/runtime/onert/core/src/backend/controlflow/Backend.h
+++ b/runtime/onert/core/src/backend/controlflow/Backend.h
@@ -21,6 +21,7 @@
#include "ConstantInitializer.h"
#include "KernelGenerator.h"
#include "TensorBuilder.h"
+#include "Tensor.h"
#include <backend/Backend.h>
@@ -63,10 +64,12 @@ public:
// there is no such case until now, let's support it later
// TODO Remove TensorBuilder and ConstantInitializer
// TODO Support Consecutive controflow operation's intermediate tensor
- auto tb = std::make_shared<TensorBuilder>();
+ auto tr = std::make_shared<TensorRegistry>();
+ auto tb = std::make_shared<TensorBuilder>(tr);
+ context->tensor_registry = tr;
context->tensor_builder = tb;
- context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb);
- context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb);
+ context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+ context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb->dynamicTensorManager(), tr);
context->tensor_register = nullptr;
context->optimizer = nullptr;
return context;
diff --git a/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h b/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h
index 35cc7835e..e21a8f357 100644
--- a/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h
+++ b/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h
@@ -17,7 +17,7 @@
#ifndef __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__
#define __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__
-#include "TensorBuilder.h"
+#include "TensorRegistry.h"
#include <backend/IConstantInitializer.h>
#include <ir/Operands.h>
@@ -33,16 +33,16 @@ class ConstantInitializer : public IConstantInitializer
{
public:
ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
- : IConstantInitializer{operands}, _tensor_builder{tensor_builder}
+ const std::shared_ptr<ITensorRegistry> &tensor_reg)
+ : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
{
}
private:
- std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
+ std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; }
private:
- std::shared_ptr<TensorBuilder> _tensor_builder;
+ std::shared_ptr<ITensorRegistry> _tensor_reg;
};
} // namespace controlflow
diff --git a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc
index e538f3fd3..1288e4c96 100644
--- a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc
+++ b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc
@@ -17,6 +17,8 @@
#include "DynamicTensorManager.h"
#include "util/logging.h"
+#include "util/Exceptions.h"
+#include "ir/DataType.h"
namespace onert
{
@@ -25,10 +27,8 @@ namespace backend
namespace controlflow
{
-DynamicTensorManager::DynamicTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> &reg,
- const std::shared_ptr<UserTensorRegistry> &user_reg)
- : _dynamic_mem_mgr{new cpu_common::DynamicMemoryManager()}, _tensors{reg},
- _user_tensors{user_reg}
+DynamicTensorManager::DynamicTensorManager(const std::shared_ptr<TensorRegistry> &tensors)
+ : _dynamic_mem_mgr{new cpu_common::DynamicMemoryManager()}, _tensors{tensors}
{
// DO NOTHING
}
@@ -36,20 +36,20 @@ DynamicTensorManager::DynamicTensorManager(const std::shared_ptr<cpu_common::Ten
void DynamicTensorManager::applyShape(const ir::OperandIndex &ind, const ir::Shape &new_shape)
{
// NOTE Handle user tensors first
- auto user_tensor = _user_tensors->getNativeTensor(ind);
+ auto user_tensor = _tensors->getNativeUserTensor(ind);
if (user_tensor)
{
// User tensors cannot be reallocated.
auto buffer_size = user_tensor->total_size();
auto new_size = new_shape.num_elements() * sizeOfDataType(user_tensor->data_type());
if (buffer_size < new_size)
- throw std::runtime_error{"ExecutorBase: output buffer size is less than output tensor size"};
+ throw InsufficientBufferSizeException{"Output buffer size is less than output tensor size"};
user_tensor->setShape(new_shape);
return;
}
- // NOTE Then handle native tensors
- auto tensor = _tensors->getNativeTensor(ind);
+ // NOTE Then handle own tensors
+ auto tensor = _tensors->getNativeOwnTensor(ind);
assert(tensor);
bool previously_dynamic = tensor->is_dynamic();
@@ -102,24 +102,13 @@ void DynamicTensorManager::buildTensor(const ir::OperandIndex &ind,
const ir::OperandInfo &tensor_info,
ir::Layout backend_layout)
{
- assert(_tensors->getNativeTensor(ind) == nullptr);
auto tensor = std::make_shared<cpu_common::Tensor>(tensor_info, backend_layout, this);
- _tensors->setNativeTensor(ind, tensor);
+ _tensors->setNativeOwnTensor(ind, tensor);
}
void DynamicTensorManager::planDealloc(ir::OperationIndex op_ind, ir::OperandIndex operand_ind)
{
- auto find = _dealloc_tensor_map.find(op_ind);
- if (find != _dealloc_tensor_map.end())
- {
- auto &input_set = find->second;
- input_set.emplace(operand_ind);
- }
- else
- {
- _dealloc_tensor_map.emplace(
- std::make_pair(op_ind, std::unordered_set<ir::OperandIndex>{operand_ind}));
- }
+ _dealloc_tensor_map[op_ind].emplace(operand_ind);
}
void DynamicTensorManager::deallocInput(ir::OperationIndex op_ind)
diff --git a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h
index 446427d64..dbe388ba2 100644
--- a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h
+++ b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h
@@ -17,11 +17,11 @@
#ifndef __ONERT_BACKEND_CONTROLFLOW_DYNAMICTENSOR_MANAGER_H__
#define __ONERT_BACKEND_CONTROLFLOW_DYNAMICTENSOR_MANAGER_H__
-#include "UserTensorRegistry.h"
+#include "TensorRegistry.h"
+#include "Tensor.h"
#include <backend/IDynamicTensorManager.h>
#include <backend/cpu_common/MemoryManager.h>
-#include <backend/cpu_common/TensorRegistry.h>
#include <ir/OperandInfo.h>
#include <ir/Operation.h>
#include <ir/Index.h>
@@ -33,16 +33,13 @@ namespace backend
namespace controlflow
{
-// TODO Find optimized algorithm to manage memory.
-
/**
* @brief Class to manage dynamic tensor and its memory
*/
class DynamicTensorManager : public backend::IDynamicTensorManager
{
public:
- DynamicTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> &reg,
- const std::shared_ptr<UserTensorRegistry> &user_reg);
+ DynamicTensorManager(const std::shared_ptr<TensorRegistry> &tensors);
virtual ~DynamicTensorManager() = default;
@@ -61,9 +58,7 @@ private:
* @todo DynamicMemoryManager is not optimized. Optimized one is needed
*/
std::shared_ptr<cpu_common::DynamicMemoryManager> _dynamic_mem_mgr;
- // TODO Refactoring : Merge two TensorRegistries into one
- const std::shared_ptr<cpu_common::TensorRegistry> _tensors;
- const std::shared_ptr<UserTensorRegistry> _user_tensors;
+ const std::shared_ptr<TensorRegistry> _tensors;
// contains list of dynamic tensor index, which can be deallocated after running operation
// note: this map could contain static tensor index too. Careful use is required.
diff --git a/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc b/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc
index eb83b7de4..de5a6a5f6 100644
--- a/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc
+++ b/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc
@@ -31,24 +31,24 @@ namespace backend
namespace controlflow
{
-KernelGenerator::KernelGenerator(const ir::Graph &graph,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
- : _graph{graph}, _tensor_builder{tensor_builder}, _tensor_builder_set{}, _executor_map{nullptr}
+KernelGenerator::KernelGenerator(const ir::Graph &graph, IDynamicTensorManager *dyn_tensor_manager,
+ const std::shared_ptr<TensorRegistry> &tensor_reg)
+ : _graph{graph}, _dyn_tensor_manager{dyn_tensor_manager}, _tensor_reg{tensor_reg},
+ _tensor_registries{}, _executor_map{nullptr}
{
UNUSED_RELEASE(_graph);
- UNUSED_RELEASE(_tensor_builder_set);
+ UNUSED_RELEASE(_tensor_registries);
UNUSED_RELEASE(_executor_map);
}
void KernelGenerator::visit(const ir::OpSequence &op_seq)
{
assert(!_return_fn_seq);
- assert(_tensor_builder->dynamicTensorManager());
- assert(_tensor_builder->tensorRegistry());
+ assert(_dyn_tensor_manager);
+ assert(_tensor_reg);
- auto dyn_tensor_manager = _tensor_builder->dynamicTensorManager();
- auto dyn_shape_inferer = std::make_unique<exec::DynamicShapeInferer>(
- _graph.operands(), dyn_tensor_manager, _tensor_builder->tensorRegistry());
+ auto dyn_shape_inferer =
+ std::make_unique<exec::DynamicShapeInferer>(_graph.operands(), _tensor_reg);
_return_fn_seq = std::make_unique<exec::FunctionSequence>();
@@ -58,8 +58,8 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
dyn_ctx->op_seq = &op_seq;
dyn_ctx->operations = &_graph.operations();
dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
- dyn_ctx->tensor_registry = _tensor_builder->tensorRegistry();
- dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
+ dyn_ctx->tensor_registry = _tensor_reg;
+ dyn_ctx->dynamic_tensor_manager = _dyn_tensor_manager;
_return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
}
@@ -93,12 +93,7 @@ void KernelGenerator::visit(const ir::operation::If &node)
auto output_tensor = getTensor(output_index);
output_tensors.emplace_back(output_tensor);
- const auto output_tensor_builder = getTensorBuilder(output_index);
- if (output_tensor_builder->supportDynamicTensor())
- {
- auto output_dyn_manager = output_tensor_builder->dynamicTensorManager();
- outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index, output_dyn_manager};
- }
+ outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index};
}
// IfLayer just set ExecutorMap instead of then and else executor to avoid complexity of
@@ -121,14 +116,7 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
std::vector<std::shared_ptr<ITensor>> output_tensors{getTensor(output_index)};
std::vector<std::shared_ptr<ITensor>> input_tensors{getTensor(input_index)};
std::unordered_map<std::shared_ptr<ITensor>, exec::DynAllocInfo> outputs_dyn_alloc_info;
- const auto output_tensor_builder = getTensorBuilder(output_index);
- VERBOSE(PERMUTE_FIND_TB) << output_index << " -> " << output_tensor_builder.get() << std::endl;
- assert(output_tensor_builder != nullptr);
- if (output_tensor_builder->supportDynamicTensor())
- {
- outputs_dyn_alloc_info[output_tensors.at(0)] =
- exec::DynAllocInfo{output_index, output_tensor_builder->dynamicTensorManager()};
- }
+ outputs_dyn_alloc_info[output_tensors.at(0)] = exec::DynAllocInfo{output_index};
auto fn =
std::make_unique<kernel::PermuteLayer>(input_tensors, output_tensors, outputs_dyn_alloc_info);
@@ -159,12 +147,7 @@ void KernelGenerator::visit(const ir::operation::While &node)
output_tensors.emplace_back(output_tensor);
- const auto output_tensor_builder = getTensorBuilder(output_index);
- if (output_tensor_builder->supportDynamicTensor())
- {
- auto output_dyn_manager = output_tensor_builder->dynamicTensorManager();
- outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index, output_dyn_manager};
- }
+ outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index};
}
// WhileLayer just set ExecutorMap instead of cond and body executor to avoid complexity of
@@ -178,34 +161,7 @@ void KernelGenerator::visit(const ir::operation::While &node)
std::shared_ptr<backend::ITensor> KernelGenerator::getTensor(const ir::OperandIndex &index)
{
- std::shared_ptr<backend::ITensor> ret;
- for (auto tensor_builder : _tensor_builder_set)
- {
- auto tensor = tensor_builder->tensorAt(index);
- if (tensor)
- {
- ret = tensor;
- break;
- }
- }
- assert(ret != nullptr);
- return ret;
-}
-
-std::shared_ptr<backend::ITensorBuilder>
-KernelGenerator::getTensorBuilder(const ir::OperandIndex &index)
-{
- std::shared_ptr<backend::ITensorBuilder> ret;
- for (auto tensor_builder : _tensor_builder_set)
- {
- auto reg = tensor_builder->tensorRegistry();
- auto tensor = reg ? reg->getNativeITensor(index) : tensor_builder->tensorAt(index);
- if (tensor)
- {
- ret = tensor_builder;
- break;
- }
- }
+ std::shared_ptr<backend::ITensor> ret = _tensor_registries.getITensor(index);
assert(ret != nullptr);
return ret;
}
diff --git a/runtime/onert/core/src/backend/controlflow/KernelGenerator.h b/runtime/onert/core/src/backend/controlflow/KernelGenerator.h
index 1fc77935c..b84a810e4 100644
--- a/runtime/onert/core/src/backend/controlflow/KernelGenerator.h
+++ b/runtime/onert/core/src/backend/controlflow/KernelGenerator.h
@@ -22,9 +22,8 @@
#include <exec/IExecutor.h>
#include <ir/Graph.h>
#include "TensorBuilder.h"
-#include "compiler/TensorBuilders.h"
-
-#include "compiler/TensorBuilders.h"
+#include "compiler/TensorRegistries.h"
+#include "TensorRegistry.h"
namespace onert
{
@@ -36,11 +35,12 @@ namespace controlflow
class KernelGenerator : public IKernelGenerator
{
public:
- KernelGenerator(const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder);
+ KernelGenerator(const ir::Graph &graph, IDynamicTensorManager *dyn_tensor_manager,
+ const std::shared_ptr<TensorRegistry> &tensor_reg);
- void setTensorBuilderSet(const compiler::TensorBuilders &tensor_builder_set)
+ void setTensorRegistries(const compiler::TensorRegistries &tensor_registries)
{
- _tensor_builder_set = tensor_builder_set;
+ _tensor_registries = tensor_registries;
}
void setExecutorMap(const std::shared_ptr<exec::ExecutorMap> &executor_map)
{
@@ -57,12 +57,12 @@ public:
private:
std::shared_ptr<backend::ITensor> getTensor(const ir::OperandIndex &index);
- std::shared_ptr<backend::ITensorBuilder> getTensorBuilder(const ir::OperandIndex &index);
private:
const ir::Graph &_graph;
- std::shared_ptr<TensorBuilder> _tensor_builder;
- compiler::TensorBuilders _tensor_builder_set;
+ IDynamicTensorManager *_dyn_tensor_manager;
+ std::shared_ptr<TensorRegistry> _tensor_reg;
+ compiler::TensorRegistries _tensor_registries;
exec::ExecutorMap *_executor_map;
};
diff --git a/runtime/onert/core/src/ir/operation/Log.cc b/runtime/onert/core/src/backend/controlflow/Tensor.h
index 85598bc87..ba5bafd75 100644
--- a/runtime/onert/core/src/ir/operation/Log.cc
+++ b/runtime/onert/core/src/backend/controlflow/Tensor.h
@@ -14,26 +14,22 @@
* limitations under the License.
*/
-#include "ir/operation/Log.h"
+#ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_H__
+#define __ONERT_BACKEND_CONTROLFLOW_TENSOR_H__
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
+#include <backend/cpu_common/Tensor.h>
namespace onert
{
-namespace ir
+namespace backend
{
-namespace operation
+namespace controlflow
{
-void Log::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Log::Log(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
+using Tensor = cpu_common::Tensor;
-} // namespace operation
-} // namespace ir
+} // namespace controlflow
+} // namespace backend
} // namespace onert
+
+#endif // __ONERT_BACKEND_CONTROLFLOW_TENSOR_H__
diff --git a/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc b/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc
index 5bddb9185..e5c3f5fd5 100644
--- a/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc
+++ b/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc
@@ -27,10 +27,10 @@ namespace backend
namespace controlflow
{
-TensorBuilder::TensorBuilder()
- : _tensor_reg{new cpu_common::TensorRegistry()}, _user_tensor_reg{new UserTensorRegistry()},
- _static_tensor_mgr{new cpu_common::StaticTensorManager(_tensor_reg)},
- _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg, _user_tensor_reg)}
+TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg)
+ : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)},
+ _static_tensor_mgr{
+ new cpu_common::StaticTensorManager(_tensor_reg->base_reg(), _dynamic_tensor_mgr.get())}
{
/* empty */
}
@@ -54,10 +54,13 @@ void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::Op
void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
{
- assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
+ // TODO Enhance the way of checking user tensors
+ if (_tensor_info_map.find(ind) == _tensor_info_map.end()) // Do not proceed for user tensors
+ return;
+
const auto tensor_info = _tensor_info_map.at(ind);
- if (!at(ind)->is_dynamic())
+ if (!nativeOwnTensorAt(ind)->is_dynamic())
{
const auto size = tensor_info.total_size();
_static_tensor_mgr->claimPlan(ind, size);
@@ -66,7 +69,11 @@ void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
{
- if (!at(ind)->is_dynamic())
+ // TODO Enhance the way of checking user tensors
+ if (_tensor_info_map.find(ind) == _tensor_info_map.end()) // Do not proceed for user tensors
+ return;
+
+ if (!nativeOwnTensorAt(ind)->is_dynamic())
{
_static_tensor_mgr->releasePlan(ind);
}
@@ -74,6 +81,11 @@ void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
{
+ // User tensors are not registered in _tensor_info_map but objects for them are exist
+ // in the tensor registry.
+ // TODO Enhance the way of checking user tensors
+ if (_tensor_reg->getITensor(ind))
+ return true;
return _tensor_info_map.find(ind) != _tensor_info_map.end();
}
@@ -89,25 +101,9 @@ void TensorBuilder::allocate()
// This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
}
-std::shared_ptr<ITensor> TensorBuilder::tensorAt(const ir::OperandIndex &ind)
-{
- // NOTE Find from User Tensor Registry first
- // FIXME There may be both user tensor and native tensor for a `ind` which is a waste
- auto user_tensor = _user_tensor_reg->getITensor(ind);
- auto tensor = _tensor_reg->getITensor(ind);
- if (user_tensor)
- {
- return user_tensor;
- }
- else
- return tensor;
-}
-
-void TensorBuilder::iterate(const IterateFunction &fn) { _static_tensor_mgr->iterate(fn); }
-
-std::shared_ptr<cpu_common::Tensor> TensorBuilder::at(const ir::OperandIndex &ind)
+std::shared_ptr<cpu_common::Tensor> TensorBuilder::nativeOwnTensorAt(const ir::OperandIndex &ind)
{
- return _tensor_reg->getNativeTensor(ind);
+ return _tensor_reg->getNativeOwnTensor(ind);
}
std::unique_ptr<ITensorManager> TensorBuilder::releaseStaticTensorManager(void)
@@ -120,10 +116,10 @@ std::unique_ptr<ITensorManager> TensorBuilder::releaseDynamicTensorManager(void)
return std::move(_dynamic_tensor_mgr);
}
-void TensorBuilder::setUserTensor(const ir::OperandIndex &ind,
- const std::shared_ptr<UserTensor> &tensor)
+void TensorBuilder::setNativeUserTensor(const ir::OperandIndex &ind,
+ const std::shared_ptr<UserTensor> &tensor)
{
- _user_tensor_reg->setNativeTensor(ind, tensor);
+ _tensor_reg->setNativeUserTensor(ind, tensor);
}
} // namespace controlflow
diff --git a/runtime/onert/core/src/backend/controlflow/TensorBuilder.h b/runtime/onert/core/src/backend/controlflow/TensorBuilder.h
index 9f2bb3754..2f2a2c47e 100644
--- a/runtime/onert/core/src/backend/controlflow/TensorBuilder.h
+++ b/runtime/onert/core/src/backend/controlflow/TensorBuilder.h
@@ -39,9 +39,7 @@ namespace controlflow
class TensorBuilder : public ITensorBuilder
{
public:
- TensorBuilder();
-
- bool supportDynamicTensor() override { return true; }
+ TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg);
/**
* @brief Register tensor information to allocate on CPU backend
@@ -61,15 +59,6 @@ public:
void allocate() override;
void postFunctionPrepare() override { /* DO NOTHING */}
- /**
- * @brief Get tensor with a specific OperandIndex
- *
- * @return shared_ptr<ITensor> if a tensor with given OperandIndex exists. nullptr otherwise.
- */
- std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) override;
-
- void iterate(const IterateFunction &fn) override;
-
std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) override;
IDynamicTensorManager *dynamicTensorManager(void) override { return _dynamic_tensor_mgr.get(); }
@@ -82,16 +71,13 @@ public:
* If not, program will crash with assert or exception.
* @return shared_ptr<operand::Tensor>
*/
- std::shared_ptr<cpu_common::Tensor> at(const ir::OperandIndex &ind);
- void setUserTensor(const ir::OperandIndex &ind, const std::shared_ptr<UserTensor> &tensor);
-
- std::shared_ptr<ITensorRegistry> tensorRegistry() override { return _tensor_reg; }
+ std::shared_ptr<cpu_common::Tensor> nativeOwnTensorAt(const ir::OperandIndex &ind);
+ void setNativeUserTensor(const ir::OperandIndex &ind, const std::shared_ptr<UserTensor> &tensor);
private:
- const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
- const std::shared_ptr<UserTensorRegistry> _user_tensor_reg;
- std::unique_ptr<cpu_common::StaticTensorManager> _static_tensor_mgr;
+ const std::shared_ptr<TensorRegistry> _tensor_reg;
std::unique_ptr<DynamicTensorManager> _dynamic_tensor_mgr;
+ std::unique_ptr<cpu_common::StaticTensorManager> _static_tensor_mgr;
ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
ir::OperandIndexMap<ir::Layout> _tensor_layout_map;
};
diff --git a/runtime/onert/core/src/backend/controlflow/TensorRegistry.h b/runtime/onert/core/src/backend/controlflow/TensorRegistry.h
new file mode 100644
index 000000000..678c5b73b
--- /dev/null
+++ b/runtime/onert/core/src/backend/controlflow/TensorRegistry.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_REGISTRY_H__
+#define __ONERT_BACKEND_CONTROLFLOW_TENSOR_REGISTRY_H__
+
+#include "backend/cpu_common/TensorRegistry.h"
+#include "backend/ITensorRegistry.h"
+#include "Tensor.h"
+#include "UserTensor.h"
+#include <assert.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace controlflow
+{
+
+/**
+ * @brief Tensor registry class for controlflow backend
+ *
+ * This class contains three types of tensors. Two native tensors(tensors that are managed by this
+ * backend) and the other is migrant tensor.
+ *
+ * - NativeUserTensor - @c UserTensor managed by this backend, buffer is user-given
+ * - NativeOwnTensor - @c cpu_common::Tensor managed by this backend ( in @c _base_reg )
+ * - MigrantTensor - @c IPortableTensor managed by other backends ( in @c _base_reg )
+ *
+ * @note @c _base_reg is used in implementation to reuse @c cpu_common::StaticTensorManager
+ *
+ */
+class TensorRegistry : public ITensorRegistry
+{
+public:
+ TensorRegistry() : _base_reg{new cpu_common::TensorRegistry} {}
+
+ std::shared_ptr<ITensor> getITensor(const ir::OperandIndex &ind) override
+ {
+ auto base_tensor = _base_reg->getITensor(ind);
+ if (base_tensor)
+ return base_tensor;
+ return getNativeUserTensor(ind);
+ }
+
+ std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &ind) override
+ {
+ auto base_tensor = _base_reg->getNativeITensor(ind);
+ if (base_tensor)
+ return base_tensor;
+ return getNativeUserTensor(ind);
+ }
+
+ std::shared_ptr<IPortableTensor> getPortableTensor(const ir::OperandIndex &ind)
+ {
+ auto base_tensor = _base_reg->getPortableTensor(ind);
+ if (base_tensor)
+ return base_tensor;
+ return getNativeUserTensor(ind);
+ }
+
+ std::shared_ptr<IPortableTensor> getNativeTensor(const ir::OperandIndex &ind)
+ {
+ auto base_tensor = _base_reg->getNativeTensor(ind);
+ if (base_tensor)
+ return base_tensor;
+ return getNativeUserTensor(ind);
+ }
+
+ std::shared_ptr<Tensor> getNativeOwnTensor(const ir::OperandIndex &ind)
+ {
+ return _base_reg->getNativeTensor(ind);
+ }
+
+ std::shared_ptr<UserTensor> getNativeUserTensor(const ir::OperandIndex &ind)
+ {
+ auto tensor = _native_user_tensors.find(ind);
+ if (tensor != _native_user_tensors.end())
+ return tensor->second;
+ return nullptr;
+ }
+
+ bool setMigrantTensor(const ir::OperandIndex &ind,
+ const std::shared_ptr<IPortableTensor> &tensor) override
+ {
+ assert(tensor);
+ assert(!getITensor(ind)); // For the ind, tensor is not registered yet
+ _base_reg->setMigrantTensor(ind, tensor);
+ return true;
+ }
+
+ void setNativeOwnTensor(ir::OperandIndex ind, const std::shared_ptr<Tensor> &tensor)
+ {
+ assert(tensor);
+ assert(!getITensor(ind)); // For the ind, tensor is not registered yet
+ _base_reg->setNativeTensor(ind, tensor);
+ }
+
+ void setNativeUserTensor(ir::OperandIndex ind, const std::shared_ptr<UserTensor> &tensor)
+ {
+ assert(tensor);
+ assert(!getITensor(ind)); // For the ind, tensor is not registered yet
+ _native_user_tensors[ind] = tensor;
+ }
+
+ const ir::OperandIndexMap<std::shared_ptr<UserTensor>> &native_user_tensors()
+ {
+ return _native_user_tensors;
+ }
+ std::shared_ptr<cpu_common::TensorRegistry> base_reg() { return _base_reg; }
+
+private:
+ std::shared_ptr<cpu_common::TensorRegistry> _base_reg;
+ ir::OperandIndexMap<std::shared_ptr<UserTensor>> _native_user_tensors;
+};
+
+} // namespace controlflow
+} // namespace backend
+} // namespace onert
+
+#endif // ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_REGISTRY_H__
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc b/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc
index 3c095b38c..e8f1ea679 100644
--- a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc
+++ b/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc
@@ -55,7 +55,11 @@ void PermuteLayer::run()
try
{
const auto dst_index = _dst_dyn_alloc_info_map.at(dst_tensor).ind;
- _dst_dyn_alloc_info_map.at(dst_tensor).dyn_tensor_manager->applyShape(dst_index, new_shape);
+ auto dyn_tensor_manager = dst_tensor->dynamic_tensor_manager();
+ if (!dyn_tensor_manager)
+ throw std::runtime_error{
+ "Error: PermuteLayer: output's TensorManager does not support dynamic tensor"};
+ dyn_tensor_manager->applyShape(dst_index, new_shape);
assert(dst_tensor->buffer() != nullptr);
}
catch (const std::out_of_range &e)
diff --git a/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc b/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc
index cb27d757f..f7ce3d011 100644
--- a/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc
+++ b/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc
@@ -95,17 +95,7 @@ void DynamicTensorManager::buildTensor(const ir::OperandIndex &ind,
void DynamicTensorManager::planDealloc(ir::OperationIndex op_ind, ir::OperandIndex operand_ind)
{
- auto find = _dealloc_tensor_map.find(op_ind);
- if (find != _dealloc_tensor_map.end())
- {
- auto &input_set = find->second;
- input_set.emplace(operand_ind);
- }
- else
- {
- _dealloc_tensor_map.emplace(
- std::make_pair(op_ind, std::unordered_set<ir::OperandIndex>{operand_ind}));
- }
+ _dealloc_tensor_map[op_ind].emplace(operand_ind);
}
void DynamicTensorManager::deallocInput(ir::OperationIndex op_ind)
diff --git a/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc b/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc
index 820cad38a..440f70c93 100644
--- a/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc
+++ b/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc
@@ -26,8 +26,10 @@ namespace backend
namespace cpu_common
{
-StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg)
- : _const_mgr{new DynamicMemoryManager()}, _nonconst_mgr{new MemoryManager()}, _tensors{reg}
+StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
+ IDynamicTensorManager *dynamic_tensor_manager)
+ : _const_mgr{new DynamicMemoryManager()}, _nonconst_mgr{new MemoryManager()}, _tensors{reg},
+ _dynamic_tensor_manager{dynamic_tensor_manager}
{
// DO NOTHING
}
@@ -78,7 +80,7 @@ void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
bool as_const)
{
assert(!_tensors->getNativeTensor(ind));
- auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout, nullptr);
+ auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout, _dynamic_tensor_manager);
_tensors->setNativeTensor(ind, tensor);
_as_constants[ind] = as_const;
}
diff --git a/runtime/onert/core/src/compiler/Compiler.cc b/runtime/onert/core/src/compiler/Compiler.cc
index 33b428a4b..93dbbc3b5 100644
--- a/runtime/onert/core/src/compiler/Compiler.cc
+++ b/runtime/onert/core/src/compiler/Compiler.cc
@@ -134,6 +134,12 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
backend::controlflow::Config::ID;
}
+ // FIXME This is a workaround for bcq operations, should remove it
+ {
+ _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq";
+ _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq";
+ }
+
{
VERBOSE(Compiler) << std::boolalpha;
VERBOSE(Compiler) << "==== Compiler Options ====" << std::endl;
@@ -181,14 +187,14 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_options.graph_dump_level);
// Lower: Assign backend
- std::unordered_map<ir::SubgraphIndex, std::unique_ptr<ir::LoweredGraph>> lowered_subgs;
+ std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>> lowered_subgs;
_subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
_options.is_primary_subgraph = (index == ir::SubgraphIndex{0});
onert::dumper::dot::DotDumper dot_dumper(subg, dump_level);
dot_dumper.dump(nnfw::misc::str("before_lower_subg-", index.value()));
// Lower: Assign backend
- lowered_subgs[index] = std::make_unique<ir::LoweredGraph>(subg, _options);
+ lowered_subgs[index] = std::make_unique<compiler::LoweredGraph>(subg, _options);
// Check backend(s) for subgraph support FP16
bool backends_support_fp16 = true;
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.cc b/runtime/onert/core/src/compiler/ExecutorFactory.cc
index 82afd9e56..062c6c9c3 100644
--- a/runtime/onert/core/src/compiler/ExecutorFactory.cc
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.cc
@@ -25,6 +25,7 @@
#include "compiler/ExecutionBuilder.h"
#include "exec/ExecTime.h"
#include "compiler/Linear.h"
+#include "compiler/TensorBuilders.h"
#include "backend/IConstantInitializer.h"
#include "backend/IKernelGenerator.h"
#include "backend/IOptimizer.h"
@@ -64,6 +65,23 @@ private:
std::shared_ptr<backend::IConfig> _config;
};
+// TODO Think of a better way to manage TensorManagers
+backend::TensorManagerSet createTensorManagerSet(const compiler::TensorBuilders &tensor_builders)
+{
+ backend::TensorManagerSet tensor_mgrs;
+ for (auto &tensor_builder : tensor_builders)
+ {
+ auto s_tensor_manager = tensor_builder->releaseStaticTensorManager();
+ if (s_tensor_manager != nullptr)
+ tensor_mgrs.insert(std::move(s_tensor_manager));
+
+ auto d_tensor_manager = tensor_builder->releaseDynamicTensorManager();
+ if (d_tensor_manager != nullptr)
+ tensor_mgrs.insert(std::move(d_tensor_manager));
+ }
+ return tensor_mgrs;
+}
+
} // namespace
} // namespace onert
@@ -87,14 +105,14 @@ ExecutorFactory::ExecutorFactory()
std::placeholders::_3, true);
}
-exec::IExecutor *ExecutorFactory::create(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+exec::IExecutor *ExecutorFactory::create(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const compiler::CompilerOptions &options,
const std::shared_ptr<exec::ExecutorMap> &executor_map)
{
return _map.at(options.executor)(std::move(lowered_graph), options, executor_map);
}
-void ExecutorFactory::initializeBackendContext(ir::LoweredGraph *lowered_graph)
+void ExecutorFactory::initializeBackendContext(compiler::LoweredGraph *lowered_graph)
{
struct Entry
{
@@ -132,7 +150,7 @@ void ExecutorFactory::initializeBackendContext(ir::LoweredGraph *lowered_graph)
}
}
-void ExecutorFactory::runTensorRegistration(ir::LoweredGraph *lowered_graph,
+void ExecutorFactory::runTensorRegistration(compiler::LoweredGraph *lowered_graph,
const std::vector<ir::OpSequenceIndex> &order)
{
for (const auto index : order)
@@ -141,6 +159,8 @@ void ExecutorFactory::runTensorRegistration(ir::LoweredGraph *lowered_graph,
const auto backend = lowered_graph->getLowerInfo(index)->backend();
const auto tensor_register = lowered_graph->backend_contexts().at(backend)->tensor_register;
auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder;
+ auto model_io = lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs();
+
if (tensor_register)
{
// Custom registration
@@ -154,7 +174,7 @@ void ExecutorFactory::runTensorRegistration(ir::LoweredGraph *lowered_graph,
const auto &op = lowered_graph->graph().operations().at(op_idx);
for (const auto &index : (op.getInputs() | ir::Remove::UNDEFINED) + op.getOutputs())
{
- if (!tensor_builder->isRegistered(index))
+ if (!tensor_builder->isRegistered(index) && !model_io.contains(index))
{
const auto &operand_lower_info =
lowered_graph->getLowerInfo(index)->def_factors().getOnlyElement();
@@ -181,15 +201,28 @@ void ExecutorFactory::runTensorRegistration(ir::LoweredGraph *lowered_graph,
}
std::vector<std::shared_ptr<backend::ITensor>>
-ExecutorFactory::initializeModelIOTensors(ir::LoweredGraph &lowered_graph,
+ExecutorFactory::initializeModelIOTensors(compiler::LoweredGraph &lowered_graph,
const ir::OperandIndexSequence &indices)
{
std::vector<std::shared_ptr<backend::ITensor>> ret;
- TensorBuilders tensor_builders{lowered_graph.backend_contexts(), false};
- std::shared_ptr<backend::controlflow::TensorBuilder> cf_tensor_builder =
- tensor_builders.getControlflowTensorBuilder();
+ // TODO Store controlflow backend in BackendContext
+ std::shared_ptr<backend::controlflow::TensorBuilder> cf_tensor_builder;
+ std::shared_ptr<backend::controlflow::TensorRegistry> cf_tensor_reg;
+ for (const auto &e : lowered_graph.backend_contexts())
+ {
+ auto backend = e.first;
+ auto &context = e.second;
+ if (backend->config()->id() == backend::controlflow::Config::ID)
+ {
+ cf_tensor_builder =
+ std::dynamic_pointer_cast<backend::controlflow::TensorBuilder>(context->tensor_builder);
+ cf_tensor_reg =
+ std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(context->tensor_registry);
+ }
+ }
assert(cf_tensor_builder);
+ assert(cf_tensor_reg);
for (auto ind : indices)
{
@@ -200,15 +233,16 @@ ExecutorFactory::initializeModelIOTensors(ir::LoweredGraph &lowered_graph,
cf_tensor_builder->dynamicTensorManager());
// Add tensor to controlflow TensorRegistry.
- cf_tensor_builder->setUserTensor(ind, tensor);
+ cf_tensor_reg->setNativeUserTensor(ind, tensor);
ret.push_back(tensor);
}
return ret;
}
-void ExecutorFactory::prepareExternalTensors(ir::LoweredGraph &lowered_graph,
- TensorBuilders &tensor_builders)
+void ExecutorFactory::prepareExternalTensors(compiler::LoweredGraph &lowered_graph)
{
+ TensorRegistries tensor_regs{lowered_graph.backend_contexts(), true};
+
lowered_graph.op_seqs().iterate(
[&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) {
auto lower_info = lowered_graph.getLowerInfo(op_seq_index);
@@ -219,20 +253,20 @@ void ExecutorFactory::prepareExternalTensors(ir::LoweredGraph &lowered_graph,
// If an OpSequence input/output tensor does not have a own tensor object,
// it must be using external tensors, so find the tensor from other tensor builders and
// set the tensor to this tensor builder if portable
- if (!backend_ctx->tensor_builder->tensorAt(ind))
+ if (!backend_ctx->tensor_registry->getITensor(ind))
{
- auto tensor = tensor_builders.getITensor(ind);
- assert(tensor); // The tensor must have been created in one of TensorBuilders
+ auto tensor = tensor_regs.getITensor(ind);
+ assert(tensor); // The tensor must have been registered
auto ptensor = std::dynamic_pointer_cast<backend::IPortableTensor>(tensor);
if (ptensor)
- backend_ctx->tensor_builder->setMigrantTensor(ind, ptensor);
+ backend_ctx->tensor_registry->setMigrantTensor(ind, ptensor);
}
}
});
}
exec::IExecutor *
-ExecutorFactory::createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const compiler::CompilerOptions &options,
const std::shared_ptr<exec::ExecutorMap> &executor_map)
{
@@ -277,13 +311,14 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_
Linear::planTensors(*lowered_graph, order);
TensorBuilders tensor_builders{lowered_graph->backend_contexts(), true};
+ TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
for (auto &tensor_builder : tensor_builders)
{
tensor_builder->prepare();
}
- prepareExternalTensors(*lowered_graph, tensor_builders);
+ prepareExternalTensors(*lowered_graph);
ExecutionBuilder builder;
@@ -296,7 +331,7 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_
auto cf_kernel_gen = dynamic_cast<backend::controlflow::KernelGenerator *>(kernel_gen.get());
if (cf_kernel_gen != nullptr)
{
- cf_kernel_gen->setTensorBuilderSet(tensor_builders);
+ cf_kernel_gen->setTensorRegistries(tensor_regs);
cf_kernel_gen->setExecutorMap(executor_map);
}
auto fn_seq = kernel_gen->generate(op_seq);
@@ -335,9 +370,10 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_
});
}
- auto exec =
- new exec::LinearExecutor{std::move(lowered_graph), input_tensors, output_tensors,
- tensor_builders, std::move(code_map), order};
+ backend::TensorManagerSet tensor_mgrs = createTensorManagerSet(tensor_builders);
+ auto exec = new exec::LinearExecutor{
+ std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
+ std::move(tensor_mgrs), std::move(code_map), order};
if (!options.trace_filepath.empty())
{
@@ -350,7 +386,7 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_
}
exec::IExecutor *ExecutorFactory::createDataflowExecutor(
- std::unique_ptr<ir::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options,
+ std::unique_ptr<compiler::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options,
const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel)
{
const auto &backend_contexts = lowered_graph->backend_contexts();
@@ -369,6 +405,7 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
}
TensorBuilders tensor_builders{lowered_graph->backend_contexts(), true};
+ TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
// To make tensors never be deallocated, this is a workaround to use static memory planner
for (auto &tensor_builder : tensor_builders)
@@ -387,7 +424,7 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
tensor_builder->prepare();
}
- prepareExternalTensors(*lowered_graph, tensor_builders);
+ prepareExternalTensors(*lowered_graph);
ExecutionBuilder builder;
@@ -401,7 +438,7 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
if (cf_kernel_gen != nullptr)
{
assert(cf_kernel_gen != nullptr);
- cf_kernel_gen->setTensorBuilderSet(tensor_builders);
+ cf_kernel_gen->setTensorRegistries(tensor_regs);
cf_kernel_gen->setExecutorMap(executor_map);
}
auto fn_seq = kernel_gen->generate(op_seq);
@@ -440,17 +477,20 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
});
}
+ backend::TensorManagerSet tensor_mgrs = createTensorManagerSet(tensor_builders);
+
exec::ExecutorBase *exec = nullptr;
if (parallel)
{
- exec = new exec::ParallelExecutor{std::move(lowered_graph), input_tensors, output_tensors,
- tensor_builders, std::move(code_map)};
+ exec = new exec::ParallelExecutor{std::move(lowered_graph), input_tensors,
+ output_tensors, tensor_regs,
+ std::move(tensor_mgrs), std::move(code_map)};
}
else
{
- auto dataflow_exec =
- new exec::DataflowExecutor{std::move(lowered_graph), input_tensors, output_tensors,
- tensor_builders, std::move(code_map)};
+ auto dataflow_exec = new exec::DataflowExecutor{std::move(lowered_graph), input_tensors,
+ output_tensors, tensor_regs,
+ std::move(tensor_mgrs), std::move(code_map)};
if (options.he_profiling_mode)
{
std::vector<const backend::Backend *> backends;
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.h b/runtime/onert/core/src/compiler/ExecutorFactory.h
index 418e5a764..b8893c03b 100644
--- a/runtime/onert/core/src/compiler/ExecutorFactory.h
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.h
@@ -21,8 +21,8 @@
#include "backend/ITensor.h"
#include "exec/IExecutor.h"
-#include "ir/LoweredGraph.h"
-#include "TensorBuilders.h"
+#include "compiler/LoweredGraph.h"
+#include "TensorRegistries.h"
namespace onert
{
@@ -35,7 +35,7 @@ public:
static ExecutorFactory &get();
public:
- exec::IExecutor *create(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ exec::IExecutor *create(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const compiler::CompilerOptions &options,
const std::shared_ptr<exec::ExecutorMap> &executor_map);
@@ -43,28 +43,27 @@ private:
ExecutorFactory();
private:
- static void initializeBackendContext(ir::LoweredGraph *lowered_graph);
- static void runTensorRegistration(ir::LoweredGraph *lowered_graph,
+ static void initializeBackendContext(compiler::LoweredGraph *lowered_graph);
+ static void runTensorRegistration(compiler::LoweredGraph *lowered_graph,
const std::vector<ir::OpSequenceIndex> &order);
static std::vector<std::shared_ptr<backend::ITensor>>
- initializeModelIOTensors(ir::LoweredGraph &lowered_graph,
+ initializeModelIOTensors(compiler::LoweredGraph &lowered_graph,
const ir::OperandIndexSequence &indices);
- static void prepareExternalTensors(ir::LoweredGraph &lowered_graph,
- TensorBuilders &tensor_builders);
+ static void prepareExternalTensors(compiler::LoweredGraph &lowered_graph);
static exec::IExecutor *
- createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const compiler::CompilerOptions &options,
const std::shared_ptr<exec::ExecutorMap> &executor_map);
static exec::IExecutor *
- createDataflowExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ createDataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const compiler::CompilerOptions &options,
const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel);
private:
- std::unordered_map<
- std::string, std::function<exec::IExecutor *(
- std::unique_ptr<ir::LoweredGraph>, const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::ExecutorMap> &executor_map)>>
+ std::unordered_map<std::string, std::function<exec::IExecutor *(
+ std::unique_ptr<compiler::LoweredGraph>,
+ const compiler::CompilerOptions &options,
+ const std::shared_ptr<exec::ExecutorMap> &executor_map)>>
_map;
};
diff --git a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
index 5c4b84ec0..23a6a253d 100644
--- a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
+++ b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
@@ -44,7 +44,7 @@ namespace onert
namespace compiler
{
-Fp32ToFp16Converter::Fp32ToFp16Converter(ir::LoweredGraph &lowered_graph)
+Fp32ToFp16Converter::Fp32ToFp16Converter(compiler::LoweredGraph &lowered_graph)
: _lowered_graph{lowered_graph}
{
VERBOSE(Fp32ToFp16Converter) << "Fp16 Enable on" << std::endl;
diff --git a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h
index 5dbf74472..eeecb9846 100644
--- a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h
+++ b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h
@@ -17,7 +17,7 @@
#ifndef __ONERT_COMPILER_FP32_TO_FP16_CONVERTER_H__
#define __ONERT_COMPILER_FP32_TO_FP16_CONVERTER_H__
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
namespace onert
{
@@ -28,7 +28,7 @@ namespace compiler
class Fp32ToFp16Converter
{
public:
- Fp32ToFp16Converter(ir::LoweredGraph &lowered_graph);
+ Fp32ToFp16Converter(compiler::LoweredGraph &lowered_graph);
public:
void run();
@@ -89,7 +89,7 @@ private:
void convertOperandsOfOpSequence(ir::OpSequence &op_seq);
private:
- ir::LoweredGraph &_lowered_graph;
+ compiler::LoweredGraph &_lowered_graph;
OpSeqIndexList _list_fp32_to_fp16;
OpSeqIndexList _list_fp16_to_fp32;
};
diff --git a/runtime/onert/core/src/compiler/HEScheduler.cc b/runtime/onert/core/src/compiler/HEScheduler.cc
index de9b4fbd0..5653b090e 100644
--- a/runtime/onert/core/src/compiler/HEScheduler.cc
+++ b/runtime/onert/core/src/compiler/HEScheduler.cc
@@ -54,42 +54,10 @@ static bool isQuant(const ir::Graph &graph, const ir::Operation &node)
return false;
}
-static bool isWorkaroundSkip(const ir::Graph &graph, const backend::Backend *backend,
- const ir::Operation &node, bool quant)
+static bool isWorkaroundSkip(const ir::Graph &, const backend::Backend *, const ir::Operation &,
+ bool)
{
- /* TODO: this is workaround, come up with better solution if have.
- Adding exception in stage doesn't help. Because if there is a record for add without
- broadcast, scheduling will select it since it doesn't distinguish broadcast and
- non-broadcast like it does for quant non-quantized*/
- if (backend->config()->id() == "cpu" &&
- (node.opcode() == ir::OpCode::Add || node.opcode() == ir::OpCode::Sub ||
- node.opcode() == ir::OpCode::Mul))
- {
- const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
- /*Broadcasting isn't supported on CPU: no way to differ the existing exec_time record with and
- * without broadcasting*/
- if (!(graph.operands().at(lhs_index).shape() == graph.operands().at(rhs_index).shape()))
- {
- return true;
- }
- }
- /* TODO: this is workaround, come up with better solution if have.
- Adding exception in stage doesn't help. Because if there is a record for Mul without
- broadcast, scheduling will select it since it doesn't distinguish broadcast and
- non-broadcast like it does for quant non-quantized*/
- else if (backend->config()->id() == "acl_neon" && node.opcode() == ir::OpCode::Mul)
- {
- const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
-
- // Nontrivial broadcasting isn't supported yet
- if (quant ||
- !(graph.operands().at(lhs_index).shape() == graph.operands().at(rhs_index).shape()))
- {
- return true;
- }
- }
+ // Now, there is no workaround
return false;
}
diff --git a/runtime/onert/core/src/compiler/HEScheduler.h b/runtime/onert/core/src/compiler/HEScheduler.h
index d8ceca9c8..b9cee5881 100644
--- a/runtime/onert/core/src/compiler/HEScheduler.h
+++ b/runtime/onert/core/src/compiler/HEScheduler.h
@@ -59,6 +59,8 @@ public:
{
for (auto &entry : backend_contexts)
{
+ if (entry.first->config()->id() == backend::controlflow::Config::ID)
+ continue;
_all_backends.push_back(entry.first);
}
_backend_resolver = std::make_unique<compiler::BackendResolver>();
diff --git a/runtime/onert/core/src/compiler/Linear.cc b/runtime/onert/core/src/compiler/Linear.cc
index 493ca1e43..49a989500 100644
--- a/runtime/onert/core/src/compiler/Linear.cc
+++ b/runtime/onert/core/src/compiler/Linear.cc
@@ -29,7 +29,7 @@ namespace onert
namespace compiler
{
-std::vector<ir::OpSequenceIndex> Linear::linearize(const ir::LoweredGraph &lowered_graph)
+std::vector<ir::OpSequenceIndex> Linear::linearize(const compiler::LoweredGraph &lowered_graph)
{
std::vector<ir::OpSequenceIndex> order;
lowered_graph.iterateTopolOpSeqs(
@@ -39,7 +39,7 @@ std::vector<ir::OpSequenceIndex> Linear::linearize(const ir::LoweredGraph &lower
return order;
}
-void Linear::dump(const ir::LoweredGraph &lowered_graph,
+void Linear::dump(const compiler::LoweredGraph &lowered_graph,
const std::vector<ir::OpSequenceIndex> &order)
{
{
@@ -62,7 +62,7 @@ void Linear::dump(const ir::LoweredGraph &lowered_graph,
}
}
-void Linear::planTensors(const ir::LoweredGraph &lowered_graph,
+void Linear::planTensors(const compiler::LoweredGraph &lowered_graph,
const std::vector<ir::OpSequenceIndex> &order)
{
const auto &graph = lowered_graph.graph();
@@ -180,11 +180,9 @@ void Linear::planTensors(const ir::LoweredGraph &lowered_graph,
tensor_builder_map[ind]->notifyLastUse(ind);
// plan for deallocation of dynamic tensor
- if (tensor_builder_map[ind]->supportDynamicTensor())
- {
- assert(tensor_builder_map[ind]->dynamicTensorManager());
- tensor_builder_map[ind]->dynamicTensorManager()->planDealloc(op_idx, ind);
- }
+ auto dyn_tensor_manager = tensor_builder_map[ind]->dynamicTensorManager();
+ if (dyn_tensor_manager)
+ dyn_tensor_manager->planDealloc(op_idx, ind);
}
}
}
diff --git a/runtime/onert/core/src/compiler/Linear.h b/runtime/onert/core/src/compiler/Linear.h
index faeff77f3..1e24cf92b 100644
--- a/runtime/onert/core/src/compiler/Linear.h
+++ b/runtime/onert/core/src/compiler/Linear.h
@@ -23,7 +23,7 @@
#include "ir/OpSequences.h"
#include "ir/Index.h"
#include "backend/ITensorBuilder.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
namespace onert
{
@@ -41,10 +41,10 @@ namespace compiler
class Linear
{
public:
- static std::vector<ir::OpSequenceIndex> linearize(const ir::LoweredGraph &lowered_graph);
- static void dump(const ir::LoweredGraph &lowered_graph,
+ static std::vector<ir::OpSequenceIndex> linearize(const compiler::LoweredGraph &lowered_graph);
+ static void dump(const compiler::LoweredGraph &lowered_graph,
const std::vector<ir::OpSequenceIndex> &order);
- static void planTensors(const ir::LoweredGraph &lowered_graph,
+ static void planTensors(const compiler::LoweredGraph &lowered_graph,
const std::vector<ir::OpSequenceIndex> &order);
};
diff --git a/runtime/onert/core/src/ir/LoweredGraph.cc b/runtime/onert/core/src/compiler/LoweredGraph.cc
index 8aedfbdf0..1489a1884 100644
--- a/runtime/onert/core/src/ir/LoweredGraph.cc
+++ b/runtime/onert/core/src/compiler/LoweredGraph.cc
@@ -14,18 +14,18 @@
* limitations under the License.
*/
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
#include <assert.h>
#include <sstream>
#include "util/logging.h"
-#include "pass/ConstantInsertionPass.h"
-#include "pass/ConstantLoweringPass.h"
-#include "pass/PermutationOperationPass.h"
-#include "pass/PermutationInsertionPass.h"
-#include "pass/PermutationEliminationPass.h"
+#include "compiler/pass/ConstantInsertionPass.h"
+#include "compiler/pass/ConstantLoweringPass.h"
+#include "compiler/pass/PermutationOperationPass.h"
+#include "compiler/pass/PermutationInsertionPass.h"
+#include "compiler/pass/PermutationEliminationPass.h"
#include "ir/GraphIterator.h"
-#include "verifier/Verifier.h"
+#include "ir/verifier/Verifier.h"
#include "backend/Backend.h"
#include "backend/IConfig.h"
#include "compiler/BackendResolver.h"
@@ -34,16 +34,15 @@
namespace onert
{
-namespace ir
+namespace compiler
{
-LoweredGraph::LoweredGraph(const Graph &graph, const compiler::CompilerOptions &options)
- : _graph{graph}
+LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &options) : _graph{graph}
{
bool linear_executor = (options.executor == "Linear");
// Build backend contexts
- auto &backend_manager = compiler::BackendManager::get();
+ auto &backend_manager = BackendManager::get();
// Always create Controlflow backend context
auto cf_backend = backend_manager.getControlflow();
@@ -73,36 +72,37 @@ LoweredGraph::LoweredGraph(const Graph &graph, const compiler::CompilerOptions &
// TODO Move "schedule" phase out of here
// Schedule
- std::unique_ptr<compiler::BackendResolver> backend_resolver;
+ std::unique_ptr<BackendResolver> backend_resolver;
if (options.he_scheduler)
{
- auto scheduler = compiler::HEScheduler(_backend_contexts, options);
+ auto scheduler = HEScheduler(_backend_contexts, options);
backend_resolver = scheduler.schedule(_graph);
_indexed_ranks = scheduler.getIndexedRanks();
}
else
{
- auto scheduler = compiler::ManualScheduler(_backend_contexts, options);
+ auto scheduler = ManualScheduler(_backend_contexts, options);
backend_resolver = scheduler.schedule(_graph);
}
{
// operand::LowerInfo holder
- OperandIndexMap<std::unique_ptr<operand::LowerInfo>> operands_lower_info;
+ ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> operands_lower_info;
- _graph.operands().iterate([&](const OperandIndex &index, const Operand &) {
- operands_lower_info[index] = std::make_unique<operand::LowerInfo>();
+ _graph.operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &) {
+ operands_lower_info[index] = std::make_unique<ir::operand::LowerInfo>();
});
// Make op_seqs while checking whether a node can be merged into a op_seq.
makeOpSequences(operands_lower_info, options, *backend_resolver);
- _op_seqs.iterate([&](const OpSequenceIndex &, OpSequence &op_seq) {
+ _op_seqs.iterate([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
assert(op_seq.operations().size() > 0);
std::reverse(std::begin(op_seq.operations()), std::end(op_seq.operations()));
});
- _op_seqs.dump("merged and sorted operations without permutation", _graph.operations());
+ VERBOSE(OpSequences) << "dump without permutation" << std::endl;
+ dumpOpSequences(_op_seqs, _graph.operations());
pass::ConstantInsertionPass ci_pass(*this);
ci_pass.run();
@@ -127,17 +127,19 @@ LoweredGraph::LoweredGraph(const Graph &graph, const compiler::CompilerOptions &
pass::PermutationEliminationPass pe_pass(*this);
pe_pass.run();
- _op_seqs.dump("merged and sorted operations with permutation", _graph.operations());
+ VERBOSE(OpSequences) << "dump with permutation" << std::endl;
+ dumpOpSequences(_op_seqs, _graph.operations());
}
// Graph verifications
{
- assert(verifier::DAGChecker().verify(_graph));
- assert(verifier::EdgeConsistencyChecker().verify(_graph));
+ assert(ir::verifier::DAGChecker().verify(_graph));
+ assert(ir::verifier::EdgeConsistencyChecker().verify(_graph));
}
}
-const operation::LowerInfo *LoweredGraph::getLowerInfo(const OpSequenceIndex &op_seq_index) const
+const ir::operation::LowerInfo *
+LoweredGraph::getLowerInfo(const ir::OpSequenceIndex &op_seq_index) const
{
auto itr = _lower_info_map.op_seq.find(op_seq_index);
if (itr == _lower_info_map.op_seq.end())
@@ -145,13 +147,13 @@ const operation::LowerInfo *LoweredGraph::getLowerInfo(const OpSequenceIndex &op
return itr->second.get();
}
-void LoweredGraph::setLowerInfo(const OpSequenceIndex &op_seq_index,
- std::unique_ptr<operation::LowerInfo> &&lower_info)
+void LoweredGraph::setLowerInfo(const ir::OpSequenceIndex &op_seq_index,
+ std::unique_ptr<ir::operation::LowerInfo> &&lower_info)
{
_lower_info_map.op_seq.insert(std::make_pair(op_seq_index, std::move(lower_info)));
}
-void LoweredGraph::removeLowerInfo(const OpSequenceIndex &op_seq_index)
+void LoweredGraph::removeLowerInfo(const ir::OpSequenceIndex &op_seq_index)
{
auto &op_seq_lower_info = _lower_info_map.op_seq;
assert(op_seq_lower_info.find(op_seq_index) != op_seq_lower_info.end());
@@ -165,7 +167,7 @@ void LoweredGraph::removeLowerInfo(const OpSequenceIndex &op_seq_index)
}
}
-const operand::LowerInfo *LoweredGraph::getLowerInfo(const OperandIndex &index) const
+const ir::operand::LowerInfo *LoweredGraph::getLowerInfo(const ir::OperandIndex &index) const
{
auto itr = _lower_info_map.operand.find(index);
if (itr == _lower_info_map.operand.end())
@@ -173,7 +175,7 @@ const operand::LowerInfo *LoweredGraph::getLowerInfo(const OperandIndex &index)
return itr->second.get();
}
-operand::LowerInfo *LoweredGraph::getLowerInfo(const OperandIndex &index)
+ir::operand::LowerInfo *LoweredGraph::getLowerInfo(const ir::OperandIndex &index)
{
auto itr = _lower_info_map.operand.find(index);
if (itr == _lower_info_map.operand.end())
@@ -181,25 +183,26 @@ operand::LowerInfo *LoweredGraph::getLowerInfo(const OperandIndex &index)
return itr->second.get();
}
-void LoweredGraph::setLowerInfo(const OperandIndex &index,
- std::unique_ptr<operand::LowerInfo> &&lower_info)
+void LoweredGraph::setLowerInfo(const ir::OperandIndex &index,
+ std::unique_ptr<ir::operand::LowerInfo> &&lower_info)
{
_lower_info_map.operand.insert(std::make_pair(index, std::move(lower_info)));
}
-void LoweredGraph::removeLowerInfo(const OperandIndex &index)
+void LoweredGraph::removeLowerInfo(const ir::OperandIndex &index)
{
_lower_info_map.operand.erase(index);
}
void LoweredGraph::iterateTopolOpSeqs(
- const std::function<void(const OpSequenceIndex &, const OpSequence &)> &fn) const
+ const std::function<void(const ir::OpSequenceIndex &, const ir::OpSequence &)> &fn) const
{
- // Topological Sorting for OpSequences
- std::vector<OpSequenceIndex> topol_sorted;
- PostDfsIterator<true>{}.iterateOpSeqs(
- *this,
- [&](const OpSequenceIndex &index, const OpSequence &) { topol_sorted.emplace_back(index); });
+ // Topological Sorting for ir::OpSequences
+ std::vector<ir::OpSequenceIndex> topol_sorted;
+ ir::PostDfsIterator<true>{}.iterateOpSeqs(
+ *this, [&](const ir::OpSequenceIndex &index, const ir::OpSequence &) {
+ topol_sorted.emplace_back(index);
+ });
std::reverse(topol_sorted.begin(), topol_sorted.end());
for (const auto op_seq_idx : topol_sorted)
{
@@ -209,12 +212,14 @@ void LoweredGraph::iterateTopolOpSeqs(
}
void LoweredGraph::iterateTopolOpSeqs(
- const std::function<void(const OpSequenceIndex &, OpSequence &)> &fn)
+ const std::function<void(const ir::OpSequenceIndex &, ir::OpSequence &)> &fn)
{
- // Topological Sorting for OpSequences
- std::vector<OpSequenceIndex> topol_sorted;
- PostDfsIterator<false>{}.iterateOpSeqs(
- *this, [&](const OpSequenceIndex &index, OpSequence &) { topol_sorted.emplace_back(index); });
+ // Topological Sorting for ir::OpSequences
+ std::vector<ir::OpSequenceIndex> topol_sorted;
+ ir::PostDfsIterator<false>{}.iterateOpSeqs(
+ *this, [&](const ir::OpSequenceIndex &index, ir::OpSequence &) {
+ topol_sorted.emplace_back(index);
+ });
std::reverse(topol_sorted.begin(), topol_sorted.end());
for (const auto op_seq_idx : topol_sorted)
{
@@ -223,12 +228,12 @@ void LoweredGraph::iterateTopolOpSeqs(
}
}
-OpSequenceIndex LoweredGraph::appendFreshSingleOpSequence(const OperationIndex &node_index,
- const Operation &node)
+ir::OpSequenceIndex LoweredGraph::appendFreshSingleOpSequence(const ir::OperationIndex &node_index,
+ const ir::Operation &node)
{
// Create a fresh op_seq with one operation, and append it to op_seqs
// Create a fresh op_seq
- auto op_seq = std::make_unique<OpSequence>(_graph.layout());
+ auto op_seq = std::make_unique<ir::OpSequence>(_graph.layout());
// Add an operation
op_seq->appendOperation(node_index);
@@ -241,21 +246,21 @@ OpSequenceIndex LoweredGraph::appendFreshSingleOpSequence(const OperationIndex &
}
void LoweredGraph::makeOpSequences(
- OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info,
- const compiler::CompilerOptions &options, const compiler::BackendResolver &backend_resolver)
+ ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
+ const CompilerOptions &options, const BackendResolver &backend_resolver)
{
// if SUBG_MAX_NODE == 0, no limit on nodes of a op_seq
const int op_seq_max_node = options.op_seq_max_node;
assert(op_seq_max_node >= 0);
bool is_profiling = options.he_profiling_mode;
- OpSequence *op_seq = nullptr;
- OpSequenceIndex op_seq_index;
+ ir::OpSequence *op_seq = nullptr;
+ ir::OpSequenceIndex op_seq_index;
// NOTE: The below method appends nodes while making one op_seq if needed. If something better
// ways, happy to update this code.
- PostDfsConstIterator{}.iterate(
- _graph, [&](const OperationIndex &node_index, const Operation &node) {
+ ir::PostDfsConstIterator{}.iterate(
+ _graph, [&](const ir::OperationIndex &node_index, const ir::Operation &node) {
// LowerInfo for in/output operands
auto backend = backend_resolver.getBackend(node_index);
@@ -269,12 +274,12 @@ void LoweredGraph::makeOpSequences(
for (auto operand : node.getInputs() | ir::Remove::UNDEFINED)
{
auto &&lower_info = operands_lower_info.at(operand);
- lower_info->addUsePermuteFactor(operand::PermuteFactor{backend, backend_layout});
+ lower_info->addUsePermuteFactor(ir::operand::PermuteFactor{backend, backend_layout});
}
for (auto operand : node.getOutputs())
{
auto &&lower_info = operands_lower_info.at(operand);
- lower_info->addDefPermuteFactor(operand::PermuteFactor{backend, backend_layout});
+ lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{backend, backend_layout});
}
bool new_op_seq = (op_seq == nullptr ||
@@ -288,9 +293,9 @@ void LoweredGraph::makeOpSequences(
{
auto new_op_seq_index = appendFreshSingleOpSequence(node_index, node);
- // OpSequence LowerInfo
+ // ir::OpSequence LowerInfo
setLowerInfo(new_op_seq_index,
- std::make_unique<operation::LowerInfo>(backend, backend_layout));
+ std::make_unique<ir::operation::LowerInfo>(backend, backend_layout));
op_seq_index = new_op_seq_index;
op_seq = &(_op_seqs.at(new_op_seq_index));
@@ -318,16 +323,17 @@ void LoweredGraph::makeOpSequences(
}
void LoweredGraph::manipulateLowerInfo(
- OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info, bool is_primary)
+ ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
+ bool is_primary)
{
- const auto controlflow_backend = compiler::BackendManager::get().getControlflow();
+ const auto controlflow_backend = BackendManager::get().getControlflow();
// TODO Rather than handling primary graph specially,
// let the permute inserted and remove it later
if (is_primary)
{
// TODO Rather than using NHWC Get frontend layout of this node from IR
- auto factor = operand::PermuteFactor{controlflow_backend, Layout::NHWC};
+ auto factor = ir::operand::PermuteFactor{controlflow_backend, ir::Layout::NHWC};
for (auto index : _graph.getInputs() | ir::Remove::UNDEFINED)
{
auto &&lower_info = operands_lower_info.at(index);
@@ -355,9 +361,9 @@ void LoweredGraph::manipulateLowerInfo(
else
{
// In case of that an operand is Graph's input and not input or output of any operation
- lower_info->addDefPermuteFactor(operand::PermuteFactor{
+ lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{
controlflow_backend,
- Layout::NHWC // TODO Get frontend layout of this node from IR
+ ir::Layout::NHWC // TODO Get frontend layout of this node from IR
});
}
}
@@ -368,15 +374,15 @@ void LoweredGraph::manipulateLowerInfo(
if (lower_info->def_factors().size() == 0)
{
// In case of that an operand is Graph's output and not input or output of any operation
- lower_info->addDefPermuteFactor(operand::PermuteFactor{
+ lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{
controlflow_backend,
- Layout::NHWC // TODO Get frontend layout of this node from IR
+ ir::Layout::NHWC // TODO Get frontend layout of this node from IR
});
}
}
// Set LowerInfo for each operand from the operand::LowerInfo holder
- _graph.operands().iterate([&](const OperandIndex &index, Operand &) {
+ _graph.operands().iterate([&](const ir::OperandIndex &index, ir::Operand &) {
setLowerInfo(index, std::move(operands_lower_info[index]));
});
}
@@ -388,11 +394,11 @@ void LoweredGraph::dumpLowerInfo()
std::map<uint32_t, std::string> dumps;
- _graph.operands().iterate([&](const OperandIndex &index, Operand &object) {
+ _graph.operands().iterate([&](const ir::OperandIndex &index, ir::Operand &object) {
std::stringstream sstream;
if (!getLowerInfo(index)->def_factors().empty() || !getLowerInfo(index)->use_factors().empty())
{
- auto factors_to_string = [](const operand::PermuteFactorSet &factors) {
+ auto factors_to_string = [](const ir::operand::PermuteFactorSet &factors) {
std::string str;
for (auto factor : factors)
{
@@ -403,7 +409,7 @@ void LoweredGraph::dumpLowerInfo()
return "{ " + str + "}";
};
- auto operation_index_to_string = [](const OperationIndexSet &operations) {
+ auto operation_index_to_string = [](const ir::OperationIndexSet &operations) {
std::string str;
for (auto op : operations)
{
@@ -427,8 +433,8 @@ void LoweredGraph::dumpLowerInfo()
sstream << (shape.dim(i)) << " ";
}
sstream << "}" << std::endl;
- sstream << " - Def Operations : " << def_ops << std::endl;
- sstream << " - Use Operations : " << use_ops << std::endl;
+ sstream << " - Def ir::Operations : " << def_ops << std::endl;
+ sstream << " - Use ir::Operations : " << use_ops << std::endl;
sstream << " - Lower Info" << std::endl;
sstream << " - Def Backends : " << def_layouts << std::endl;
sstream << " - Use Backends : " << use_layouts << std::endl;
@@ -445,8 +451,9 @@ void LoweredGraph::dumpLowerInfo()
}
}
-bool LoweredGraph::mergeable(const OpSequenceIndex &op_seq_index, const OperationIndex &node_index,
- Layout layout, const compiler::BackendResolver &backend_resolver)
+bool LoweredGraph::mergeable(const ir::OpSequenceIndex &op_seq_index,
+ const ir::OperationIndex &node_index, ir::Layout layout,
+ const BackendResolver &backend_resolver)
{
// Are they mergeable?
// 1. the same backend id and layout?
@@ -470,10 +477,10 @@ bool LoweredGraph::mergeable(const OpSequenceIndex &op_seq_index, const Operatio
// Branched?
{
- std::unordered_set<OperationIndex> branched_set;
+ std::unordered_set<ir::OperationIndex> branched_set;
// Check for branching up
- for (const auto &input : op_seq.getInputs() | Remove::DUPLICATED | ir::Remove::UNDEFINED)
+ for (const auto &input : op_seq.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
{
const auto &input_obj = _graph.operands().at(input);
auto def = input_obj.getDef();
@@ -489,7 +496,7 @@ bool LoweredGraph::mergeable(const OpSequenceIndex &op_seq_index, const Operatio
branched_set.clear();
// Check for branching down
- for (const auto &output : node.getOutputs() | Remove::DUPLICATED)
+ for (const auto &output : node.getOutputs() | ir::Remove::DUPLICATED)
{
// TODO Fix this workaround for the case of model outputs that are used by another operation
// This is needed since the branching is decided by operation, but for model outputs,
@@ -516,7 +523,7 @@ bool LoweredGraph::mergeable(const OpSequenceIndex &op_seq_index, const Operatio
const auto &node_outputs = node.getOutputs();
// op_seq's operations are in order so that we just check the first and the last
- std::vector<OperationIndex> op_seq_ops{op_seq.operations()[0]};
+ std::vector<ir::OperationIndex> op_seq_ops{op_seq.operations()[0]};
if (op_seq.operations().size() > 1)
op_seq_ops.emplace_back(op_seq.operations()[op_seq.operations().size() - 1]);
@@ -556,5 +563,5 @@ bool LoweredGraph::mergeable(const OpSequenceIndex &op_seq_index, const Operatio
return false;
}
-} // namespace ir
+} // namespace compiler
} // namespace onert
diff --git a/runtime/onert/core/src/compiler/ManualScheduler.cc b/runtime/onert/core/src/compiler/ManualScheduler.cc
index 1d591ae3c..ed49ee56f 100644
--- a/runtime/onert/core/src/compiler/ManualScheduler.cc
+++ b/runtime/onert/core/src/compiler/ManualScheduler.cc
@@ -40,7 +40,7 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap
const auto &manual_options = _options.manual_scheduler_options;
auto backend_resolver = std::make_unique<compiler::BackendResolver>();
- // This fallback will be used for unavailable backends
+ // This fallback will be used in case that `backend_for_all` is unavailable
auto fallback = [&]() -> const backend::Backend * {
for (auto backend_id : _options.backend_list)
{
@@ -50,7 +50,8 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap
}
return nullptr;
}();
- assert(fallback != nullptr); // There must be at least one fallback
+ if (fallback == nullptr)
+ throw std::runtime_error{"No loaded backends available."};
// 1. Backend for All operations
const backend::Backend *backend_all = resolveBackend(manual_options.backend_for_all, fallback);
@@ -110,7 +111,7 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap
const backend::Backend *ManualScheduler::resolveBackend(const std::string &id,
const backend::Backend *fallback)
{
- // Ensure if the backend is available in the backend
+ // Ensure if the backend is available in the current backend context
const backend::Backend *backend = BackendManager::get().get(id);
if (!backend || _backend_contexts.find(backend) == _backend_contexts.end())
{
diff --git a/runtime/onert/core/src/compiler/OperationValidator.cc b/runtime/onert/core/src/compiler/OperationValidator.cc
index 44496318f..f7f659e3e 100644
--- a/runtime/onert/core/src/compiler/OperationValidator.cc
+++ b/runtime/onert/core/src/compiler/OperationValidator.cc
@@ -68,19 +68,6 @@ void OperationValidator::operator()()
[&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); });
}
-void OperationValidator::visit(const ir::operation::Abs &node) { checkUnaryOp(node); }
-
-void OperationValidator::visit(const ir::operation::AvgPool2D &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
- return;
-
- const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)};
-
- OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
-}
-
void OperationValidator::visit(const ir::operation::BatchMatMul &node)
{
const auto lhs_index(node.getInputs().at(ir::operation::BatchMatMul::Input::LHS));
@@ -125,17 +112,6 @@ void OperationValidator::visit(const ir::operation::BatchToSpaceND &node)
OP_REQUIRES(input_shape.C == output_shape.C);
}
-void OperationValidator::visit(const ir::operation::Cast &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- const auto input_index{node.getInputs().at(0)};
-
- OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
-}
-
void OperationValidator::visit(const ir::operation::Comparison &node)
{
const auto output_index{node.getOutputs().at(0)};
@@ -177,6 +153,17 @@ void OperationValidator::visit(const ir::operation::InstanceNorm &node)
OP_REQUIRES(_ctx.at(beta_index).shape().rank() == 1);
}
+void OperationValidator::visit(const ir::operation::Pool2D &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ if (_ctx.at(ofm_index).info().isDynamic())
+ return;
+
+ const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
+
+ OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
+}
+
void OperationValidator::visit(const ir::operation::Permute &node)
{
VERBOSE(Permute) << "Configure Permute operation" << std::endl;
@@ -298,8 +285,6 @@ void OperationValidator::visit(const ir::operation::RNN &node)
num_units == _ctx.at(hidden_state_out_index).shape().dim(1));
}
-void OperationValidator::visit(const ir::operation::Round &node) { checkUnaryOp(node); }
-
void OperationValidator::visit(const ir::operation::SpaceToBatchND &node)
{
const auto ofm_index{node.getOutputs().at(0)};
@@ -353,6 +338,51 @@ void OperationValidator::visit(const ir::operation::SpaceToDepth &node)
OP_REQUIRES(input_shape.C * block_size * block_size == output_shape.C);
}
+void OperationValidator::visit(const ir::operation::ElementwiseActivation &node)
+{
+ checkUnaryOp(node);
+}
+
+void OperationValidator::visit(const ir::operation::ElementwiseBinary &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
+
+ OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type());
+ OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(output_index).typeInfo().type());
+}
+
+void OperationValidator::visit(const ir::operation::ElementwiseUnary &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
+
+ OP_REQUIRES(node.getInputs().size() == 1);
+ OP_REQUIRES(node.getOutputs().size() == 1);
+
+ // Check if I/O types match
+ if (node.param().op_type == ir::operation::ElementwiseUnary::Type::DEQUANTIZE)
+ {
+ OP_REQUIRES(_ctx.at(input_index).typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM);
+ OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::FLOAT32);
+ }
+ else if (node.param().op_type == ir::operation::ElementwiseUnary::Type::QUANTIZE)
+ {
+ OP_REQUIRES(_ctx.at(input_index).typeInfo().type() == ir::DataType::FLOAT32);
+ OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM);
+ }
+ else if (node.param().op_type != ir::operation::ElementwiseUnary::Type::CAST)
+ {
+ OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
+ }
+
+ if (_ctx.at(output_index).info().isDynamic())
+ return;
+
+ OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
+}
+
void OperationValidator::visit(const ir::operation::EmbeddingLookup &node)
{
const auto output_index{node.getOutputs().at(0)};
@@ -389,8 +419,6 @@ void OperationValidator::visit(const ir::operation::EmbeddingLookup &node)
}
}
-void OperationValidator::visit(const ir::operation::Exp &node) { checkUnaryOp(node); }
-
void OperationValidator::visit(const ir::operation::ExpandDims &node)
{
const auto output_index{node.getOutputs().at(0)};
@@ -405,8 +433,6 @@ void OperationValidator::visit(const ir::operation::ExpandDims &node)
OP_REQUIRES(_ctx.at(axis_index).shape().rank() <= 1);
}
-void OperationValidator::visit(const ir::operation::Floor &node) { checkUnaryOp(node); }
-
void OperationValidator::visit(const ir::operation::HashtableLookup &node)
{
const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)};
@@ -495,21 +521,6 @@ void OperationValidator::visit(const ir::operation::Gather &node)
OP_REQUIRES(ofm_shape.rank() <= 4);
}
-void OperationValidator::visit(const ir::operation::Dequantize &node)
-{
- const auto output_index{node.getOutputs().at(0)};
-
- const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
-
- OP_REQUIRES(_ctx.at(input_index).typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM);
- OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::FLOAT32);
-
- if (_ctx.at(output_index).info().isDynamic())
- return;
- OP_REQUIRES(_ctx.at(input_index).shape().rank() <= 4);
- OP_REQUIRES(_ctx.at(input_index).shape() == _ctx.at(output_index).shape());
-}
-
void OperationValidator::visit(const ir::operation::DepthToSpace &node)
{
// param check
@@ -822,30 +833,6 @@ void OperationValidator::visit(const ir::operation::Pad &node)
OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank());
}
-void OperationValidator::visit(const ir::operation::Min &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- // This validator does not check shape. So checking isDynamic() is skipped.
-
- const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
-
- OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type());
- OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(output_index).typeInfo().type());
-}
-
-void OperationValidator::visit(const ir::operation::Max &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- // This validator does not check shape. So checking isDynamic() is skipped.
-
- const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
-
- OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type());
- OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(output_index).typeInfo().type());
-}
-
void OperationValidator::visit(const ir::operation::Select &node)
{
const auto output_index{node.getOutputs().at(0)};
@@ -899,12 +886,6 @@ void OperationValidator::visit(const ir::operation::Split &node)
OP_REQUIRES(_ctx.at(input_index).shape().dim(axis) % num_splits == 0);
}
-void OperationValidator::visit(const ir::operation::Cos &node) { checkUnaryOp(node); }
-
-void OperationValidator::visit(const ir::operation::Sin &node) { checkUnaryOp(node); }
-
-void OperationValidator::visit(const ir::operation::RSQRT &node) { checkUnaryOp(node); }
-
void OperationValidator::visit(const ir::operation::Shape &node)
{
const auto output_index{node.getOutputs().at(0)};
@@ -961,12 +942,6 @@ void OperationValidator::visit(const ir::operation::While &node)
// TODO Add to validate with subgraphs
}
-void OperationValidator::visit(const ir::operation::Neg &node) { checkUnaryOp(node); }
-
-void OperationValidator::visit(const ir::operation::Log &node) { checkUnaryOp(node); }
-
-void OperationValidator::visit(const ir::operation::LogicalNot &node) { checkUnaryOp(node); }
-
void OperationValidator::visit(const ir::operation::SquaredDifference &node)
{
const auto output_index{node.getOutputs().at(0)};
@@ -1027,16 +1002,6 @@ void OperationValidator::visit(const ir::operation::Tile &node)
OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank());
}
-void OperationValidator::visit(const ir::operation::LogicalOr &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(0)};
- const auto rhs_index{node.getInputs().at(1)};
-
- OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type());
- OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(output_index).typeInfo().type());
-}
-
void OperationValidator::visit(const ir::operation::Range &node)
{
const auto output_index{node.getOutputs().at(0)};
@@ -1084,24 +1049,5 @@ void OperationValidator::visit(const ir::operation::LogSoftmax &node)
OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
}
-void OperationValidator::visit(const ir::operation::Quantize &node)
-{
- VERBOSE(Quantize) << "Configure Quantize operation" << std::endl;
-
- OP_REQUIRES(node.getInputs().size() == 1);
- OP_REQUIRES(node.getOutputs().size() == 1);
-
- const auto input_index{node.getInputs().at(0)};
- const auto output_index{node.getOutputs().at(0)};
-
- OP_REQUIRES(_ctx.at(input_index).typeInfo().type() == ir::DataType::FLOAT32);
-
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM);
-
- OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
-}
} // namespace compiler
} // namespace onert
diff --git a/runtime/onert/core/src/compiler/OperationValidator.h b/runtime/onert/core/src/compiler/OperationValidator.h
index b27e6863c..deb6357bb 100644
--- a/runtime/onert/core/src/compiler/OperationValidator.h
+++ b/runtime/onert/core/src/compiler/OperationValidator.h
@@ -44,58 +44,45 @@ public:
void operator()();
public:
- void visit(const ir::operation::Abs &node) override;
- void visit(const ir::operation::AvgPool2D &node) override;
void visit(const ir::operation::BatchMatMul &node) override;
void visit(const ir::operation::BatchToSpaceND &node) override;
- void visit(const ir::operation::Cast &node) override;
void visit(const ir::operation::Comparison &node) override;
void visit(const ir::operation::Softmax &node) override;
void visit(const ir::operation::InstanceNorm &node) override;
void visit(const ir::operation::Permute &node) override;
+ void visit(const ir::operation::Pool2D &node) override;
void visit(const ir::operation::Reduce &node) override;
void visit(const ir::operation::Transpose &node) override;
void visit(const ir::operation::RNN &node) override;
- void visit(const ir::operation::Round &node) override;
void visit(const ir::operation::SpaceToBatchND &node) override;
void visit(const ir::operation::SpaceToDepth &node) override;
+ void visit(const ir::operation::ElementwiseActivation &node) override;
+ void visit(const ir::operation::ElementwiseBinary &node) override;
+ void visit(const ir::operation::ElementwiseUnary &node) override;
void visit(const ir::operation::EmbeddingLookup &node) override;
- void visit(const ir::operation::Exp &node) override;
void visit(const ir::operation::ExpandDims &node) override;
- void visit(const ir::operation::Floor &node) override;
void visit(const ir::operation::HashtableLookup &node) override;
void visit(const ir::operation::TransposeConv &node) override;
void visit(const ir::operation::Gather &node) override;
- void visit(const ir::operation::Dequantize &node) override;
void visit(const ir::operation::DepthToSpace &node) override;
void visit(const ir::operation::Pack &node) override;
void visit(const ir::operation::LSTM &node) override;
void visit(const ir::operation::L2Normalization &node) override;
void visit(const ir::operation::Unpack &node) override;
void visit(const ir::operation::Pad &node) override;
- void visit(const ir::operation::Min &node) override;
- void visit(const ir::operation::Max &node) override;
void visit(const ir::operation::Select &node) override;
void visit(const ir::operation::StridedSlice &node) override;
void visit(const ir::operation::Split &node) override;
- void visit(const ir::operation::Cos &node) override;
- void visit(const ir::operation::Sin &node) override;
- void visit(const ir::operation::RSQRT &node) override;
void visit(const ir::operation::Shape &node) override;
void visit(const ir::operation::ResizeBilinear &node) override;
void visit(const ir::operation::Reverse &node) override;
void visit(const ir::operation::If &node) override;
void visit(const ir::operation::While &node) override;
- void visit(const ir::operation::Neg &node) override;
- void visit(const ir::operation::Log &node) override;
- void visit(const ir::operation::LogicalNot &node) override;
void visit(const ir::operation::SquaredDifference &node) override;
void visit(const ir::operation::Tile &node) override;
- void visit(const ir::operation::LogicalOr &node) override;
void visit(const ir::operation::Range &node) override;
void visit(const ir::operation::MatrixBandPart &node) override;
void visit(const ir::operation::LogSoftmax &node) override;
- void visit(const ir::operation::Quantize &node) override;
private:
void checkUnaryOp(const ir::Operation &node);
diff --git a/runtime/onert/core/src/compiler/StaticShapeInference.cc b/runtime/onert/core/src/compiler/StaticShapeInference.cc
index 76c1edcbc..4eba1ff49 100644
--- a/runtime/onert/core/src/compiler/StaticShapeInference.cc
+++ b/runtime/onert/core/src/compiler/StaticShapeInference.cc
@@ -25,6 +25,64 @@ namespace onert
namespace compiler
{
+bool StaticShapeInferer::infer(const ir::OpSequence &op_seq)
+{
+ bool has_dynamic_tensor = false;
+
+ for (const auto &operation_idx : op_seq.operations())
+ {
+ auto &op = _operations.at(operation_idx);
+ auto opcode = op.opcode();
+
+ _return_has_dynamic_tensor = false; // this is used as a return value inside operation's visit()
+
+ // IF: need shape inference for then, else
+ // While: need shape inference for condition, body
+ if (opcode == ir::OpCode::If || opcode == ir::OpCode::While)
+ {
+ op.accept(*this);
+ }
+ else
+ {
+ _return_has_dynamic_tensor = checkDynamicInput(op);
+
+ if (_return_has_dynamic_tensor)
+ {
+ setDynamicOutput(op);
+ }
+ else
+ {
+ op.accept(*this);
+ }
+ }
+
+ has_dynamic_tensor = has_dynamic_tensor || _return_has_dynamic_tensor;
+ }
+
+ return has_dynamic_tensor;
+}
+
+bool StaticShapeInferer::checkDynamicInput(const ir::Operation &op)
+{
+ for (auto input_idx : op.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED)
+ {
+ if (_operands.at(input_idx).info().isDynamic())
+ {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void StaticShapeInferer::setDynamicOutput(const ir::Operation &op)
+{
+ for (auto output_idx : op.getOutputs())
+ {
+ _operands.at(output_idx).info().setDynamic();
+ }
+}
+
void StaticShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op,
const ir::OperandIndex lhs_idx,
const ir::OperandIndex rhs_idx)
@@ -35,13 +93,6 @@ void StaticShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op,
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- if (lhs.info().isDynamic() || rhs.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// re-sizing output shape
ir::Shape new_shape = shape_inference::inferEltwiseShape(lhs.info().shape(), rhs.info().shape());
output.info().shape(new_shape);
@@ -56,14 +107,6 @@ void StaticShapeInferer::handleSimpleUnaryOp(const ir::Operation &op,
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- // if input is dynamic, output also becomes dynamic
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// re-sizing output shape
ir::Shape new_shape = input.info().shape();
output.info().shape(new_shape);
@@ -99,17 +142,6 @@ void StaticShapeInferer::dump()
}
}
-void StaticShapeInferer::visit(const ir::operation::Abs &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Abs::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Add &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Add::Input::LHS),
- op.getInputs().at(ir::operation::Add::Input::RHS));
-}
-
void StaticShapeInferer::visit(const ir::operation::ArgMax &op)
{
const auto input_idx{op.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
@@ -118,15 +150,6 @@ void StaticShapeInferer::visit(const ir::operation::ArgMax &op)
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
-
- // if input is dynamic, output also becomes dynamic
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
const auto rank = input.info().shape().rank();
const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
@@ -145,35 +168,22 @@ void StaticShapeInferer::visit(const ir::operation::BatchMatMul &op)
const auto lhs = _operands.at(lhs_index);
const auto rhs = _operands.at(rhs_index);
auto &output = _operands.at(output_index);
-
- if (lhs.info().isDynamic() || rhs.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
auto new_shape = shape_inference::inferBatchMatMulShape(lhs.shape(), rhs.shape(), op.param());
output.info().shape(new_shape);
}
-void StaticShapeInferer::visit(const ir::operation::BroadcastTo &op)
+void StaticShapeInferer::visit(const ir::operation::BinaryArithmetic &op)
{
- const auto input_idx{op.getInputs().at(ir::operation::BroadcastTo::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
+ handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS),
+ op.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS));
+}
+void StaticShapeInferer::visit(const ir::operation::BroadcastTo &op)
+{
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- // if input is dynamic, output also becomes dynamic.
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
const auto shape_idx{op.getInputs().at(ir::operation::BroadcastTo::Input::SHAPE)};
const auto &shape = _operands.at(shape_idx);
@@ -192,11 +202,6 @@ void StaticShapeInferer::visit(const ir::operation::BroadcastTo &op)
output.info().shape(new_shape);
}
-void StaticShapeInferer::visit(const ir::operation::Cast &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Cast::Input::INPUT));
-}
-
void StaticShapeInferer::visit(const ir::operation::Comparison &op)
{
handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Comparison::Input::INPUT0),
@@ -215,14 +220,6 @@ void StaticShapeInferer::visit(const ir::operation::Concat &op)
{
const auto input_idx{op.getInputs().at(i)};
const auto &input = _operands.at(input_idx);
-
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
input_shapes.emplace_back(input.shape());
}
@@ -241,33 +238,26 @@ void StaticShapeInferer::visit(const ir::operation::Conv2D &op)
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- if (input.info().isDynamic() || ker.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// re-sizing output shape
ir::Shape new_shape =
shape_inference::inferConv2DShape(input.info().shape(), ker.info().shape(), op.param());
output.info().shape(new_shape);
}
-void StaticShapeInferer::visit(const ir::operation::Cos &op)
+void StaticShapeInferer::visit(const ir::operation::ElementwiseActivation &op)
{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Cos::Input::INPUT));
+ handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT));
}
-void StaticShapeInferer::visit(const ir::operation::Div &op)
+void StaticShapeInferer::visit(const ir::operation::ElementwiseBinary &op)
{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Div::Input::LHS),
- op.getInputs().at(ir::operation::Div::Input::RHS));
+ handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS),
+ op.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS));
}
-void StaticShapeInferer::visit(const ir::operation::Exp &op)
+void StaticShapeInferer::visit(const ir::operation::ElementwiseUnary &op)
{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Exp::Input::INPUT));
+ handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT));
}
void StaticShapeInferer::visit(const ir::operation::ExpandDims &op)
@@ -279,13 +269,6 @@ void StaticShapeInferer::visit(const ir::operation::ExpandDims &op)
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
if (!axis.isConstant())
{
output.info().setDynamic();
@@ -310,13 +293,6 @@ void StaticShapeInferer::visit(const ir::operation::Fill &op)
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
if (!input.isConstant())
{
output.info().setDynamic();
@@ -345,15 +321,6 @@ void StaticShapeInferer::visit(const ir::operation::FullyConnected &op)
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
-
- // if input or ker is dynamic, output also becomes dynamic
- if (input.info().isDynamic() || ker.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// re-sizing output shape
ir::Shape new_shape =
shape_inference::inferFullyConnectedShape(input.info().shape(), ker.info().shape());
@@ -376,15 +343,6 @@ void StaticShapeInferer::visit(const ir::operation::Gather &op)
const auto indices_idx{op.getInputs().at(ir::operation::Gather::Input::INDICES)};
const auto &indices = _operands.at(indices_idx);
-
- // if input is dynamic, output also becomes dynamic
- if (input.info().isDynamic() || indices.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
const auto rank = input.info().shape().rank();
const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
@@ -476,27 +434,6 @@ void StaticShapeInferer::visit(const ir::operation::If &op)
}
}
-void StaticShapeInferer::visit(const ir::operation::Log &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Log::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::LogicalNot &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::LogicalNot::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::LogicalOr &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::LogicalOr::Input::INPUT0),
- op.getInputs().at(ir::operation::LogicalOr::Input::INPUT1));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Logistic &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Logistic::Input::INPUT));
-}
-
void StaticShapeInferer::visit(const ir::operation::L2Normalization &op)
{
handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::L2Normalization::Input::INPUT));
@@ -507,29 +444,6 @@ void StaticShapeInferer::visit(const ir::operation::MatrixBandPart &op)
handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::MatrixBandPart::Input::INPUT));
}
-void StaticShapeInferer::visit(const ir::operation::Max &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Max::Input::LHS),
- op.getInputs().at(ir::operation::Max::Input::RHS));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Min &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Min::Input::LHS),
- op.getInputs().at(ir::operation::Min::Input::RHS));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Mul &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Mul::Input::LHS),
- op.getInputs().at(ir::operation::Mul::Input::RHS));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Neg &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Neg::Input::INPUT));
-}
-
void StaticShapeInferer::visit(const ir::operation::OneHot &op)
{
const auto indice_idx{op.getInputs().at(ir::operation::OneHot::Input::INDICES)};
@@ -542,7 +456,7 @@ void StaticShapeInferer::visit(const ir::operation::OneHot &op)
auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- if (indice.info().isDynamic() || depth.info().isDynamic() || !depth.isConstant())
+ if (!depth.isConstant())
{
output.info().setDynamic();
_return_has_dynamic_tensor = true;
@@ -558,18 +472,6 @@ void StaticShapeInferer::visit(const ir::operation::OneHot &op)
void StaticShapeInferer::visit(const ir::operation::Pack &op)
{
- bool is_any_of_inputs_dynamic = [&]() -> bool {
- for (uint32_t i = 0; i < op.getInputs().size(); ++i)
- {
- const auto &input = _operands.at(op.getInputs().at(i));
- if (input.info().isDynamic())
- {
- return true;
- }
- }
- return false;
- }();
-
const auto input_idx{op.getInputs().at(0)};
const auto &input = _operands.at(input_idx);
@@ -577,14 +479,6 @@ void StaticShapeInferer::visit(const ir::operation::Pack &op)
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- // if input is dynamic, output also becomes dynamic
- if (is_any_of_inputs_dynamic)
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
const auto rank = input.shape().rank() + 1;
const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
const auto num = op.param().num;
@@ -608,14 +502,6 @@ void StaticShapeInferer::visit(const ir::operation::Pad &op)
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- // if input is dynamic or pad is dynamic, output also becomes dynamic
- if (input.info().isDynamic() || pad.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// if pad is not constant, output also becomes dynamic
if (!pad.isConstant())
{
@@ -638,13 +524,6 @@ void StaticShapeInferer::visit(const ir::operation::Permute &op)
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// re-sizing output shape
// Permute is a special operation that layouts of input/output may be different on backend
// However, it is not applied here, so input/output have the same layout of frontend. Because
@@ -672,13 +551,6 @@ void StaticShapeInferer::visit(const ir::operation::Range &op)
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- // if any input is dynamic, output also becomes dynamic
- if (start_op.info().isDynamic() || limit_op.info().isDynamic() || delta_op.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
ir::Shape new_shape;
if (start_op.isConstant() && limit_op.isConstant() && delta_op.isConstant())
@@ -716,14 +588,6 @@ void StaticShapeInferer::visit(const ir::operation::Reduce &op)
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- // if input is dynamic, output also becomes dynamic
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
std::vector<int32_t> axes_vec;
for (size_t i = 0; i < axes.shape().num_elements(); ++i)
{
@@ -761,14 +625,6 @@ void StaticShapeInferer::visit(const ir::operation::Reshape &op)
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- // if input is dynamic, output also becomes dynamic
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// New shape is given by second input tensor
if (op.getInputs().size() == 2)
{
@@ -827,14 +683,6 @@ void StaticShapeInferer::visit(const ir::operation::ResizeBilinear &op)
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- // if input is dynamic, output also becomes dynamic
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// Shape inferencing logic based on Params
ir::Shape new_shape = shape_inference::inferResizeBilinearShape(
input.shape(), op.param().height_out, op.param().width_out);
@@ -852,16 +700,6 @@ void StaticShapeInferer::visit(const ir::operation::Reverse &op)
handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Reverse::Input::INPUT));
}
-void StaticShapeInferer::visit(const ir::operation::Round &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Round::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::RSQRT &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::RSQRT::Input::INPUT));
-}
-
void StaticShapeInferer::visit(const ir::operation::Select &op)
{
const auto input_cond_idx{op.getInputs().at(ir::operation::Select::Input::CONDITION)};
@@ -876,14 +714,6 @@ void StaticShapeInferer::visit(const ir::operation::Select &op)
auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- if (input_cond.info().isDynamic() || input_true.info().isDynamic() ||
- input_false.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// Select output shpae
ir::Shape new_shape = shape_inference::inferSelectShape(
input_cond.info().shape(), input_true.info().shape(), input_false.info().shape());
@@ -899,14 +729,6 @@ void StaticShapeInferer::visit(const ir::operation::Shape &op)
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- // if input is dynamic, output also becomes dynamic
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// re-sizing output shape
ir::Shape output_shape;
output_shape.append(input.info().shape().rank());
@@ -914,11 +736,6 @@ void StaticShapeInferer::visit(const ir::operation::Shape &op)
output.info().shape(output_shape);
}
-void StaticShapeInferer::visit(const ir::operation::Sin &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Sin::Input::INPUT));
-}
-
void StaticShapeInferer::visit(const ir::operation::Slice &op)
{
const auto input_index{op.getInputs().at(ir::operation::Slice::Input::INPUT)};
@@ -930,13 +747,6 @@ void StaticShapeInferer::visit(const ir::operation::Slice &op)
const auto output_index = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_index);
- if (input.info().isDynamic() || begins.info().isDynamic() || sizes.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// Whether input is constant or not does not affect whether output is dynamic or not
if (!(begins.isConstant() && sizes.isConstant()))
{
@@ -970,13 +780,6 @@ void StaticShapeInferer::visit(const ir::operation::SpaceToBatchND &op)
const auto &block_shape = _operands.at(block_shape_idx);
const auto &padding = _operands.at(padding_idx);
- if (input.info().isDynamic() || block_shape.info().isDynamic() || padding.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
// Whether input is constant or not does not affect whether output is dynamic or not
if (!(block_shape.isConstant() && padding.isConstant()))
{
@@ -1006,18 +809,6 @@ void StaticShapeInferer::visit(const ir::operation::Split &op)
const auto axis = op.param().axis;
const auto num_splits = op.param().num_splits;
- if (input.info().isDynamic())
- {
- for (int out_tensor_idx = 0; out_tensor_idx < num_splits; out_tensor_idx++)
- {
- const auto output_idx = op.getOutputs().at(out_tensor_idx);
- ir::Operand &output = _operands.at(output_idx);
- output.info().setDynamic();
- }
- _return_has_dynamic_tensor = true;
- return;
- }
-
const auto rank = input.info().shape().rank();
auto axis_resolved = axis < 0 ? axis + rank : axis;
@@ -1072,14 +863,6 @@ void StaticShapeInferer::visit(const ir::operation::StridedSlice &op)
const auto output_index = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_index);
- if (input.info().isDynamic() || starts.info().isDynamic() || ends.info().isDynamic() ||
- strides.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
if (!(starts.isConstant() && ends.isConstant() && strides.isConstant()))
{
output.info().setDynamic();
@@ -1104,17 +887,6 @@ void StaticShapeInferer::visit(const ir::operation::StridedSlice &op)
output.info().shape(new_shape);
}
-void StaticShapeInferer::visit(const ir::operation::Sub &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Sub::Input::LHS),
- op.getInputs().at(ir::operation::Sub::Input::RHS));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Tanh &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Tanh::Input::INPUT));
-}
-
void StaticShapeInferer::visit(const ir::operation::Tile &op)
{
const auto input_idx{op.getInputs().at(ir::operation::Tile::Input::INPUT)};
@@ -1126,13 +898,6 @@ void StaticShapeInferer::visit(const ir::operation::Tile &op)
const auto output_idx = op.getOutputs().at(0);
ir::Operand &output = _operands.at(output_idx);
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
if (!multiplier.isConstant())
{
output.info().setDynamic();
@@ -1158,13 +923,7 @@ void StaticShapeInferer::visit(const ir::operation::Transpose &op)
ir::Operand &output = _operands.at(output_idx);
const auto perm{op.param().perm};
// const auto rank{op.param().rank};
- // if input is dynamic, output also becomes dynamic
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
+
// set output shape, based on input and params
ir::Shape new_shape = shape_inference::inferTransposeShape(input.info().shape(), perm);
output.info().shape(new_shape);
@@ -1175,20 +934,6 @@ void StaticShapeInferer::visit(const ir::operation::Unpack &op)
const auto input_idx{op.getInputs().at(0)};
const auto &input = _operands.at(input_idx);
const auto num = op.param().num;
-
- // if input is dynamic, output also becomes dynamic
- if (input.info().isDynamic())
- {
- for (int out_tensor_idx = 0; out_tensor_idx < num; out_tensor_idx++)
- {
- const auto output_idx = op.getOutputs().at(out_tensor_idx);
- ir::Operand &output = _operands.at(output_idx);
- output.info().setDynamic();
- }
- _return_has_dynamic_tensor = true;
- return;
- }
-
const auto rank = input.shape().rank();
const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
@@ -1346,11 +1091,6 @@ void StaticShapeInferer::visit(const ir::operation::While &op)
}
}
-void StaticShapeInferer::visit(const ir::operation::ZerosLike &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ZerosLike::Input::INPUT));
-}
-
} // namespace compiler
} // namespace onert
diff --git a/runtime/onert/core/src/compiler/TensorBuilders.h b/runtime/onert/core/src/compiler/TensorBuilders.h
index c0a1ebc04..3b0360b4b 100644
--- a/runtime/onert/core/src/compiler/TensorBuilders.h
+++ b/runtime/onert/core/src/compiler/TensorBuilders.h
@@ -67,17 +67,6 @@ public:
return _cf_tensor_builder;
}
- std::shared_ptr<backend::ITensor> getITensor(ir::OperandIndex ind)
- {
- for (auto &tensor_builder : _tensor_builders)
- {
- auto tensor = tensor_builder->tensorAt(ind);
- if (tensor)
- return tensor;
- }
- return nullptr;
- }
-
private:
std::unordered_set<std::shared_ptr<backend::ITensorBuilder>> _tensor_builders;
std::shared_ptr<backend::controlflow::TensorBuilder> _cf_tensor_builder;
diff --git a/runtime/onert/core/src/compiler/TensorRegistries.h b/runtime/onert/core/src/compiler/TensorRegistries.h
new file mode 100644
index 000000000..8be87b081
--- /dev/null
+++ b/runtime/onert/core/src/compiler/TensorRegistries.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_TENSOR_REGISTRIES_H__
+#define __ONERT_COMPILER_TENSOR_REGISTRIES_H__
+
+#include <unordered_set>
+#include <memory>
+#include "backend/BackendContext.h"
+#include "backend/Backend.h"
+#include "backend/controlflow/Config.h"
+#include "backend/controlflow/TensorBuilder.h"
+#include "backend/controlflow/TensorRegistry.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+class TensorRegistries
+{
+public:
+ TensorRegistries() = default;
+
+ TensorRegistries(const onert::backend::BackendContexts &backend_contexts,
+ bool include_controlflow)
+ {
+ for (const auto &e : backend_contexts)
+ {
+ auto tensor_reg = e.second->tensor_registry;
+ if (e.first->config()->id() == backend::controlflow::Config::ID)
+ {
+ _cf_tensor_reg =
+ std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(tensor_reg);
+ if (include_controlflow)
+ _tensor_regs.insert(tensor_reg);
+ }
+ else
+ {
+ _tensor_regs.insert(tensor_reg);
+ }
+ }
+ }
+
+ std::unordered_set<std::shared_ptr<onert::backend::ITensorRegistry>>::const_iterator begin() const
+ {
+ return _tensor_regs.cbegin();
+ }
+ std::unordered_set<std::shared_ptr<onert::backend::ITensorRegistry>>::const_iterator end() const
+ {
+ return _tensor_regs.cend();
+ }
+
+ std::shared_ptr<backend::controlflow::TensorRegistry> getControlflowTensorRegistry() const
+ {
+ return _cf_tensor_reg;
+ }
+
+ std::shared_ptr<backend::ITensor> getITensor(ir::OperandIndex ind) const
+ {
+ for (auto &tensor_reg : _tensor_regs)
+ {
+ auto tensor = tensor_reg->getITensor(ind);
+ if (tensor)
+ return tensor;
+ }
+ return nullptr;
+ }
+
+private:
+ std::unordered_set<std::shared_ptr<backend::ITensorRegistry>> _tensor_regs;
+ std::shared_ptr<backend::controlflow::TensorRegistry> _cf_tensor_reg;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_TENSOR_REGISTRIES_H__
diff --git a/runtime/onert/core/src/ir/pass/ConstantInsertionPass.cc b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc
index 1742a0dd5..647669e46 100644
--- a/runtime/onert/core/src/ir/pass/ConstantInsertionPass.cc
+++ b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc
@@ -22,20 +22,20 @@
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
-void ConstantInsertionPass::callback(const OperationIndex &node_index, Operation &node)
+void ConstantInsertionPass::callback(const ir::OperationIndex &node_index, ir::Operation &node)
{
const auto &op_sequence_index = _lowered_graph.op_seqs().getOperation(node_index);
const auto op_seq_lower_info = _lowered_graph.getLowerInfo(op_sequence_index);
const auto backend = op_seq_lower_info->backend();
const auto layout = op_seq_lower_info->layout();
- const auto factor = operand::PermuteFactor{backend, layout};
+ const auto factor = ir::operand::PermuteFactor{backend, layout};
- for (const auto input : node.getInputs() | Remove::DUPLICATED | ir::Remove::UNDEFINED)
+ for (const auto input : node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
{
auto &object = _graph.operands().at(input);
@@ -47,7 +47,7 @@ void ConstantInsertionPass::callback(const OperationIndex &node_index, Operation
auto new_object = object;
new_object.unsetDef();
// TODO Remove const_case
- const_cast<OperationIndexSet &>(new_object.getUses()).clear();
+ const_cast<ir::OperationIndexSet &>(new_object.getUses()).clear();
const auto new_index = _graph.operands().emplace(new_object);
_replace_operands_map[key] = new_index;
}
@@ -89,5 +89,5 @@ void ConstantInsertionPass::callback(const OperationIndex &node_index, Operation
}
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/ConstantInsertionPass.h b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h
index 3ea4dc397..052883c92 100644
--- a/runtime/onert/core/src/ir/pass/ConstantInsertionPass.h
+++ b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_GRAPH_PASS_CONSTANT_INSERTION_PASS_H__
-#define __ONERT_GRAPH_PASS_CONSTANT_INSERTION_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_CONSTANT_INSERTION_PASS_H__
+#define __ONERT_COMPILER_PASS_CONSTANT_INSERTION_PASS_H__
#include <ir/operand/PermuteFactor.h>
#include <ir/Index.h>
@@ -25,7 +25,7 @@
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
@@ -39,13 +39,13 @@ public:
std::string id() final { return "ConstantInsertionPass"; }
public:
- void callback(const OperationIndex &index, Operation &node) final;
+ void callback(const ir::OperationIndex &index, ir::Operation &node) final;
private:
struct ReplaceKey
{
- OperandIndex index;
- operand::PermuteFactor factor;
+ ir::OperandIndex index;
+ ir::operand::PermuteFactor factor;
bool operator==(const ReplaceKey &other) const
{
@@ -61,15 +61,16 @@ private:
std::size_t operator()(const ReplaceKey &key) const noexcept
{
using std::hash;
- return hash<OperandIndex>()(key.index) ^ (hash<operand::PermuteFactor>()(key.factor) << 1);
+ return hash<ir::OperandIndex>()(key.index) ^
+ (hash<ir::operand::PermuteFactor>()(key.factor) << 1);
}
};
- std::unordered_map<ReplaceKey, OperandIndex, KeyHasher> _replace_operands_map;
+ std::unordered_map<ReplaceKey, ir::OperandIndex, KeyHasher> _replace_operands_map;
};
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
-#endif // __ONERT_GRAPH_PASS_CONSTANT_INSERTION_PASS_H__
+#endif // __ONERT_COMPILER_PASS_CONSTANT_INSERTION_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/ConstantLoweringPass.cc b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc
index 04f4e59c0..1c1dbe0ee 100644
--- a/runtime/onert/core/src/ir/pass/ConstantLoweringPass.cc
+++ b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc
@@ -23,28 +23,28 @@
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
-void ConstantLoweringPass::callback(const OperationIndex &node_index, Operation &node)
+void ConstantLoweringPass::callback(const ir::OperationIndex &node_index, ir::Operation &node)
{
const auto &op_sequence_index = _lowered_graph.op_seqs().getOperation(node_index);
const auto op_seq_lower_info = _lowered_graph.getLowerInfo(op_sequence_index);
const auto backend = op_seq_lower_info->backend();
const auto layout = op_seq_lower_info->layout();
- const auto factor = operand::PermuteFactor{backend, layout};
+ const auto factor = ir::operand::PermuteFactor{backend, layout};
// Now this runtime does not support the node making output of operation as constant
- for (const auto input : node.getInputs() | Remove::DUPLICATED | ir::Remove::UNDEFINED)
+ for (const auto input : node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
{
auto &object = _graph.operands().at(input);
if (object.isConstant())
{
// All constant operand are already assinged at each backend by ContantInsertionPass. So a
// constant has `def` and `use` as the same PermuteFactor
- _lowered_graph.setLowerInfo(input, std::make_unique<operand::LowerInfo>());
+ _lowered_graph.setLowerInfo(input, std::make_unique<ir::operand::LowerInfo>());
_lowered_graph.getLowerInfo(input)->addDefPermuteFactor(factor);
_lowered_graph.getLowerInfo(input)->addUsePermuteFactor(factor);
}
@@ -52,5 +52,5 @@ void ConstantLoweringPass::callback(const OperationIndex &node_index, Operation
}
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/ConstantLoweringPass.h b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.h
index 5c9f4352b..e17d776d1 100644
--- a/runtime/onert/core/src/ir/pass/ConstantLoweringPass.h
+++ b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.h
@@ -14,15 +14,15 @@
* limitations under the License.
*/
-#ifndef __ONERT_GRAPH_PASS_CONSTANT_LOWERING_PASS_H__
-#define __ONERT_GRAPH_PASS_CONSTANT_LOWERING_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_CONSTANT_LOWERING_PASS_H__
+#define __ONERT_COMPILER_PASS_CONSTANT_LOWERING_PASS_H__
#include <ir/Index.h>
#include "LoweredOperationPass.h"
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
@@ -36,11 +36,11 @@ public:
std::string id() final { return "ConstantLoweringPass"; }
public:
- void callback(const OperationIndex &index, Operation &node) final;
+ void callback(const ir::OperationIndex &index, ir::Operation &node) final;
};
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
-#endif // __ONERT_GRAPH_PASS_CONSTANT_LOWERING_PASS_H__
+#endif // __ONERT_COMPILER_PASS_CONSTANT_LOWERING_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/LoweredOperandPass.h b/runtime/onert/core/src/compiler/pass/LoweredOperandPass.h
index eefb8ddfb..0c5f7d745 100644
--- a/runtime/onert/core/src/ir/pass/LoweredOperandPass.h
+++ b/runtime/onert/core/src/compiler/pass/LoweredOperandPass.h
@@ -18,11 +18,11 @@
#define __ONERT_IR_PASS_LOWERED_OPERAND_PASS_H__
#include "OperandPass.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
@@ -30,7 +30,7 @@ namespace pass
class LoweredOperandPass : public OperandPass
{
public:
- LoweredOperandPass(ir::LoweredGraph &lowered_graph)
+ LoweredOperandPass(compiler::LoweredGraph &lowered_graph)
: OperandPass{lowered_graph.graph()}, _lowered_graph{lowered_graph}
{
// DO NOTHING
@@ -39,14 +39,14 @@ public:
virtual ~LoweredOperandPass() = default;
std::string id() override = 0;
- void callback(const OperandIndex &i, Operand &o) override = 0;
+ void callback(const ir::OperandIndex &i, ir::Operand &o) override = 0;
protected:
- ir::LoweredGraph &_lowered_graph;
+ compiler::LoweredGraph &_lowered_graph;
};
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
#endif // __ONERT_IR_PASS_LOWERED_OPERAND_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/LoweredOperationPass.h b/runtime/onert/core/src/compiler/pass/LoweredOperationPass.h
index 0138712d7..5c8569be2 100644
--- a/runtime/onert/core/src/ir/pass/LoweredOperationPass.h
+++ b/runtime/onert/core/src/compiler/pass/LoweredOperationPass.h
@@ -18,11 +18,11 @@
#define __ONERT_IR_PASS_LOWERED_OPERATION_PASS_H__
#include "OperationPass.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
@@ -30,7 +30,7 @@ namespace pass
class LoweredOperationPass : public OperationPass
{
public:
- LoweredOperationPass(ir::LoweredGraph &lowered_graph)
+ LoweredOperationPass(LoweredGraph &lowered_graph)
: OperationPass{lowered_graph.graph()}, _lowered_graph{lowered_graph}
{
// DO NOTHING
@@ -39,14 +39,14 @@ public:
virtual ~LoweredOperationPass() = default;
std::string id() override = 0;
- void callback(const OperationIndex &i, Operation &o) override = 0;
+ void callback(const ir::OperationIndex &i, ir::Operation &o) override = 0;
protected:
- ir::LoweredGraph &_lowered_graph;
+ LoweredGraph &_lowered_graph;
};
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
#endif // __ONERT_IR_PASS_LOWERED_OPERATION_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/OperandPass.cc b/runtime/onert/core/src/compiler/pass/OperandPass.cc
index 693a0f493..50c001c30 100644
--- a/runtime/onert/core/src/ir/pass/OperandPass.cc
+++ b/runtime/onert/core/src/compiler/pass/OperandPass.cc
@@ -20,7 +20,7 @@
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
@@ -28,9 +28,9 @@ namespace pass
void OperandPass::run()
{
_graph.operands().iterate(
- [&](const OperandIndex &index, Operand &object) { callback(index, object); });
+ [&](const ir::OperandIndex &index, ir::Operand &object) { callback(index, object); });
}
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/OperandPass.h b/runtime/onert/core/src/compiler/pass/OperandPass.h
index 393060741..b094879c5 100644
--- a/runtime/onert/core/src/ir/pass/OperandPass.h
+++ b/runtime/onert/core/src/compiler/pass/OperandPass.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_GRAPH_PASS_OPERAND_PASS_H__
-#define __ONERT_GRAPH_PASS_OPERAND_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_OPERAND_PASS_H__
+#define __ONERT_COMPILER_PASS_OPERAND_PASS_H__
#include "Pass.h"
#include "ir/Index.h"
@@ -30,7 +30,7 @@ class Operand;
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
@@ -44,11 +44,11 @@ public:
public:
std::string id() override = 0;
void run() override final;
- virtual void callback(const OperandIndex &i, Operand &o) = 0;
+ virtual void callback(const ir::OperandIndex &i, ir::Operand &o) = 0;
};
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
-#endif // __ONERT_GRAPH_PASS_OPERAND_PASS_H__
+#endif // __ONERT_COMPILER_PASS_OPERAND_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/OperationPass.cc b/runtime/onert/core/src/compiler/pass/OperationPass.cc
index 84b1da3ee..d7a55cb22 100644
--- a/runtime/onert/core/src/ir/pass/OperationPass.cc
+++ b/runtime/onert/core/src/compiler/pass/OperationPass.cc
@@ -22,7 +22,7 @@
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
@@ -30,9 +30,9 @@ namespace pass
void OperationPass::run()
{
_graph.operations().iterate(
- [&](const OperationIndex &index, Operation &node) { callback(index, node); });
+ [&](const ir::OperationIndex &index, ir::Operation &node) { callback(index, node); });
}
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/OperationPass.h b/runtime/onert/core/src/compiler/pass/OperationPass.h
index 1733f87ed..ac4d818a2 100644
--- a/runtime/onert/core/src/ir/pass/OperationPass.h
+++ b/runtime/onert/core/src/compiler/pass/OperationPass.h
@@ -19,8 +19,8 @@
* @brief This file contains OperationPass class
*/
-#ifndef __ONERT_GRAPH_PASS_OPERATION_PASS_H__
-#define __ONERT_GRAPH_PASS_OPERATION_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_OPERATION_PASS_H__
+#define __ONERT_COMPILER_PASS_OPERATION_PASS_H__
#include "Pass.h"
#include "ir/Index.h"
@@ -35,7 +35,7 @@ class Operation;
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
@@ -62,7 +62,7 @@ public:
* @param index is the index of a node in graph
* @param node is the node in graph
*/
- virtual void callback(const OperationIndex &index, Operation &node) = 0;
+ virtual void callback(const ir::OperationIndex &index, ir::Operation &node) = 0;
/**
* @brief Run the pass
@@ -71,7 +71,7 @@ public:
};
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
-#endif // __ONERT_GRAPH_PASS_OPERATION_PASS_H__
+#endif // __ONERT_COMPILER_PASS_OPERATION_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/Pass.h b/runtime/onert/core/src/compiler/pass/Pass.h
index 1c6628f6f..3f356c337 100644
--- a/runtime/onert/core/src/ir/pass/Pass.h
+++ b/runtime/onert/core/src/compiler/pass/Pass.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_GRAPH_PASS_PASS_H__
-#define __ONERT_GRAPH_PASS_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_PASS_H__
+#define __ONERT_COMPILER_PASS_PASS_H__
#include <string>
@@ -24,12 +24,12 @@ namespace onert
namespace ir
{
class Graph;
-} // namespace ir
+} // namespace compiler
} // namespace onert
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
@@ -37,7 +37,7 @@ namespace pass
class Pass
{
public:
- Pass(Graph &graph) : _graph{graph} {}
+ Pass(ir::Graph &graph) : _graph{graph} {}
virtual ~Pass() = default;
public:
@@ -45,11 +45,11 @@ public:
virtual void run() = 0;
protected:
- Graph &_graph;
+ ir::Graph &_graph;
};
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
-#endif // __ONERT_GRAPH_PASS_PASS_H__
+#endif // __ONERT_COMPILER_PASS_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/PermutationEliminationPass.cc b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc
index 2deccd40b..f01697034 100644
--- a/runtime/onert/core/src/ir/pass/PermutationEliminationPass.cc
+++ b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc
@@ -21,35 +21,33 @@
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
-void PermutationEliminationPass::callback(const OperationIndex &ind, Operation &node)
+void PermutationEliminationPass::callback(const ir::OperationIndex &ind, ir::Operation &node)
{
_op_ind = ind;
node.accept(*this);
};
-void PermutationEliminationPass::visit(const operation::Permute &node)
+void PermutationEliminationPass::visit(const ir::operation::Permute &node)
{
auto in_operand = node.getInputs().at(0);
auto out_operand = node.getOutputs().at(0);
- // Check if two tensors are both portable
- // TODO Make this general, this is just a workaround to check two tensors are portable
+ // Check if two tensors are both portable if not, we can't eliminate the node
{
auto in_def_factor = _lowered_graph.getLowerInfo(in_operand)->def_factors().getOnlyElement();
auto out_def_factor = _lowered_graph.getLowerInfo(out_operand)->def_factors().getOnlyElement();
- auto in_backend_id = in_def_factor.backend()->config()->id();
- auto out_backend_id = out_def_factor.backend()->config()->id();
+ auto in_config = in_def_factor.backend()->config();
+ auto out_config = out_def_factor.backend()->config();
- // TODO Fix this workaround that removes only Permute between cpu and controlflow backend.
- // This should be general.
- if (!((in_backend_id == backend::controlflow::Config::ID && out_backend_id == "cpu") ||
- (in_backend_id == "cpu" && out_backend_id == backend::controlflow::Config::ID)))
+ // FIXME Supporting dynamic tensor does not exactly mean those are portable.
+ // It may need to have another config option for checking if each uses `IPortableTensor`.
+ if (!(in_config->supportDynamicTensor() && out_config->supportDynamicTensor()))
return;
}
@@ -65,7 +63,7 @@ void PermutationEliminationPass::visit(const operation::Permute &node)
if (!op_seq.getOutputs().contains(in_operand))
return;
- // Update OpSequence/Operation edges and Operand edges
+ // Update OpSequence/ir::Operation edges and ir::Operand edges
op_seq.replaceOutputs(in_operand, out_operand);
for (auto op : op_seq.operations())
{
@@ -106,8 +104,8 @@ void PermutationEliminationPass::visit(const operation::Permute &node)
});
VERBOSE(removePermute) << "Permute Op removed, node index : " << _op_ind << std::endl;
- VERBOSE(removePermute) << " - Input (removed) Operand : " << in_operand << std::endl;
- VERBOSE(removePermute) << " - Output(kept) Operand : " << out_operand << std::endl;
+ VERBOSE(removePermute) << " - Input (removed) ir::Operand : " << in_operand << std::endl;
+ VERBOSE(removePermute) << " - Output(kept) ir::Operand : " << out_operand << std::endl;
}
else
{
@@ -145,11 +143,11 @@ void PermutationEliminationPass::visit(const operation::Permute &node)
}
VERBOSE(removePermute) << "Permute Op removed, node index : " << _op_ind << std::endl;
- VERBOSE(removePermute) << " - Input (kept) Operand : " << in_operand << std::endl;
- VERBOSE(removePermute) << " - Output(removed) Operand : " << out_operand << std::endl;
+ VERBOSE(removePermute) << " - Input (kept) ir::Operand : " << in_operand << std::endl;
+ VERBOSE(removePermute) << " - Output(removed) ir::Operand : " << out_operand << std::endl;
}
}
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/PermutationEliminationPass.h b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h
index 614e44cd2..29daf1a82 100644
--- a/runtime/onert/core/src/ir/pass/PermutationEliminationPass.h
+++ b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h
@@ -14,15 +14,15 @@
* limitations under the License.
*/
-#ifndef __ONERT_GRAPH_PASS_PERMUTATION_ELIMINATION_PASS_H__
-#define __ONERT_GRAPH_PASS_PERMUTATION_ELIMINATION_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_PERMUTATION_ELIMINATION_PASS_H__
+#define __ONERT_COMPILER_PASS_PERMUTATION_ELIMINATION_PASS_H__
#include "ir/OperationVisitor.h"
#include "LoweredOperationPass.h"
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
@@ -40,7 +40,7 @@ namespace pass
* @note This is an optimization pass which means that everything should work fine even if this pass
* was skipped.
*/
-class PermutationEliminationPass : public LoweredOperationPass, public OperationVisitor
+class PermutationEliminationPass : public LoweredOperationPass, public ir::OperationVisitor
{
public:
using LoweredOperationPass::LoweredOperationPass;
@@ -49,17 +49,17 @@ public:
std::string id() final { return "PermutationEliminationPass"; }
public:
- void callback(const OperationIndex &i, Operation &n) final;
+ void callback(const ir::OperationIndex &i, ir::Operation &n) final;
private:
- void visit(const operation::Permute &) final;
+ void visit(const ir::operation::Permute &) final;
private:
ir::OperationIndex _op_ind;
};
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
-#endif // __ONERT_GRAPH_PASS_PERMUTATION_ELIMINATION_PASS_H__
+#endif // __ONERT_COMPILER_PASS_PERMUTATION_ELIMINATION_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/PermutationInsertionPass.cc b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
index 3578af813..c83a72ada 100644
--- a/runtime/onert/core/src/ir/pass/PermutationInsertionPass.cc
+++ b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
@@ -31,12 +31,12 @@
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
-void PermutationInsertionPass::callback(const OperandIndex &index, Operand &object)
+void PermutationInsertionPass::callback(const ir::OperandIndex &index, ir::Operand &object)
{
auto &&operand_li = _lowered_graph.getLowerInfo(index);
assert(operand_li);
@@ -48,10 +48,10 @@ void PermutationInsertionPass::callback(const OperandIndex &index, Operand &obje
return;
}
- std::list<OperationIndex> permute_indexes;
+ std::list<ir::OperationIndex> permute_indexes;
// Build a map for all necessary type of operands
- std::unordered_map<operand::PermuteFactor, OperandIndex> factor_to_index;
+ std::unordered_map<ir::operand::PermuteFactor, ir::OperandIndex> factor_to_index;
{
assert(operand_li->def_factors().size() == 1);
for (auto factor : operand_li->def_factors())
@@ -72,7 +72,7 @@ void PermutationInsertionPass::callback(const OperandIndex &index, Operand &obje
// Update operations' input that uses this operand
{
- std::list<OperationIndex> remove_list;
+ std::list<ir::OperationIndex> remove_list;
auto uses = object.getUses();
for (auto use : uses)
@@ -121,8 +121,8 @@ void PermutationInsertionPass::callback(const OperandIndex &index, Operand &obje
}
}
-OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &operand_index,
- const operand::PermuteFactor &factor)
+ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandIndex &operand_index,
+ const ir::operand::PermuteFactor &factor)
{
assert(!_graph.isBuildingPhase());
@@ -143,14 +143,14 @@ OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &opera
auto output_backend = factor.backend();
// NOTE Permute may not have specific layout because the layout of input and output may be
// different.
- const auto permute_node_layout = Layout::UNKNOWN;
+ const auto permute_node_layout = ir::Layout::UNKNOWN;
// NOTE If one backend supports several layout, the backend must support Permute operation
const backend::Backend *permute_node_backend = compiler::BackendManager::get().getControlflow();
if (input_backend == output_backend)
{
permute_node_backend = input_backend;
}
- const operand::PermuteFactor permute_node_factor{permute_node_backend, permute_node_layout};
+ const ir::operand::PermuteFactor permute_node_factor{permute_node_backend, permute_node_layout};
// Update LowerInfo of input operand
auto operand_lower_info = _lowered_graph.getLowerInfo(operand_index);
@@ -158,7 +158,7 @@ OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &opera
operand_lower_info->addUsePermuteFactor(permute_node_factor);
// Update LowerInfo of output operand
- auto out_operand_li = std::make_unique<operand::LowerInfo>();
+ auto out_operand_li = std::make_unique<ir::operand::LowerInfo>();
// The input and output factors of all nodes will be the same except Permute. So Tensor's
// allocators allocates memory using only the information of def permutation factor now.
@@ -170,13 +170,13 @@ OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &opera
// Insert permute operation to the graph
const auto input_layout = input_factor.layout();
const auto output_layout = factor.layout();
- using Permute = operation::Permute;
+ using Permute = ir::operation::Permute;
const auto permute_type = [&]() {
- if (input_layout == Layout::NHWC && output_layout == Layout::NCHW)
+ if (input_layout == ir::Layout::NHWC && output_layout == ir::Layout::NCHW)
{
return Permute::Type::NHWC_TO_NCHW;
}
- else if (input_layout == Layout::NCHW && output_layout == Layout::NHWC)
+ else if (input_layout == ir::Layout::NCHW && output_layout == ir::Layout::NHWC)
{
return Permute::Type::NCHW_TO_NHWC;
}
@@ -200,7 +200,7 @@ OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &opera
auto &op_seq = _lowered_graph.op_seqs().at(op_seq_index);
op_seq.setInputs(node.getInputs());
op_seq.setOutputs(node.getOutputs());
- _lowered_graph.setLowerInfo(op_seq_index, std::make_unique<operation::LowerInfo>(
+ _lowered_graph.setLowerInfo(op_seq_index, std::make_unique<ir::operation::LowerInfo>(
permute_node_backend, permute_node_layout));
}
@@ -212,5 +212,5 @@ OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &opera
return node_index;
}
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/PermutationInsertionPass.h b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.h
index 6c30c6f12..758515385 100644
--- a/runtime/onert/core/src/ir/pass/PermutationInsertionPass.h
+++ b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.h
@@ -14,17 +14,17 @@
* limitations under the License.
*/
-#ifndef __ONERT_GRAPH_PASS_PERMUTATION_INSERTION_PASS_H__
-#define __ONERT_GRAPH_PASS_PERMUTATION_INSERTION_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_PERMUTATION_INSERTION_PASS_H__
+#define __ONERT_COMPILER_PASS_PERMUTATION_INSERTION_PASS_H__
#include "LoweredOperandPass.h"
#include "compiler/BackendManager.h"
-#include "ir/Operand.h" //for OperationIndex
+#include "ir/Operand.h"
#include "ir/operand/PermuteFactor.h"
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
@@ -36,7 +36,7 @@ public:
public:
std::string id() override { return "PermutationInsertionPass"; }
- void callback(const OperandIndex &index, Operand &object) override;
+ void callback(const ir::OperandIndex &index, ir::Operand &object) override;
private:
/**
@@ -45,14 +45,14 @@ private:
* @param operand_index is the target operand index for the insertion
* @param factor is the output operand's backend type and layout
*
- * @return OperationIndex
+ * @return ir::OperationIndex
*/
- OperationIndex insertPermute(const OperandIndex &operand_index,
- const operand::PermuteFactor &factor);
+ ir::OperationIndex insertPermute(const ir::OperandIndex &operand_index,
+ const ir::operand::PermuteFactor &factor);
};
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
-#endif // __ONERT_GRAPH_PASS_PERMUTATION_INSERTION_PASS_H__
+#endif // __ONERT_COMPILER_PASS_PERMUTATION_INSERTION_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/PermutationOperationPass.cc b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc
index 6eb412cf1..c5c95c726 100644
--- a/runtime/onert/core/src/ir/pass/PermutationOperationPass.cc
+++ b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc
@@ -23,11 +23,13 @@
namespace onert
{
-namespace ir
+namespace compiler
{
namespace pass
{
+using namespace ir;
+
void PermutationOperationPass::callback(const OperationIndex &, Operation &node)
{
node.accept(*this);
@@ -70,7 +72,7 @@ void PermutationOperationPass::applyExpandRanks(const Operation &node)
"operand used in more than one node");
// TODO remove const_cast later. For example, _ctx may need to be a non const variable or
// a node to extend shape may be inserted in front of this operation
- const_cast<ir::Shape &>(operand.shape()).extendRank(expanded_rank);
+ const_cast<Shape &>(operand.shape()).extendRank(expanded_rank);
}
}
}
@@ -134,7 +136,7 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node)
const auto op_seq_li = _lowered_graph.getLowerInfo(op_seq_index);
_lowered_graph.setLowerInfo(
next_op_seq_index,
- std::make_unique<operation::LowerInfo>(op_seq_li->backend(), op_seq_li->layout()));
+ std::make_unique<ir::operation::LowerInfo>(op_seq_li->backend(), op_seq_li->layout()));
}
}
@@ -164,8 +166,8 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node)
auto &new_op_seq = _lowered_graph.op_seqs().at(new_op_seq_index);
new_op_seq.setInputs(node.getInputs());
new_op_seq.setOutputs(node.getOutputs());
- _lowered_graph.setLowerInfo(new_op_seq_index,
- std::make_unique<operation::LowerInfo>(backend, frontend_layout));
+ _lowered_graph.setLowerInfo(
+ new_op_seq_index, std::make_unique<ir::operation::LowerInfo>(backend, frontend_layout));
}
// Change PermuteFactors of operands of target node
@@ -175,7 +177,7 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node)
const auto backend = op_seq_li->backend();
const operand::PermuteFactor removed_factor{backend, backend_layout};
const operand::PermuteFactor new_factor{backend, frontend_layout};
- for (const auto &input : node.getInputs() | Remove::DUPLICATED | ir::Remove::UNDEFINED)
+ for (const auto &input : node.getInputs() | Remove::DUPLICATED | Remove::UNDEFINED)
{
bool canRemove = true;
for (const auto &use : _graph.operands().at(input).getUses())
@@ -227,17 +229,31 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node)
}
}
-void PermutationOperationPass::visit(const operation::Add &node) { applyExpandRanks(node); }
+void PermutationOperationPass::visit(const ir::operation::BinaryArithmetic &node)
+{
+ applyExpandRanks(node);
+}
-void PermutationOperationPass::visit(const operation::Concat &node) { applyExpandRanks(node); }
+void PermutationOperationPass::visit(const ir::operation::Concat &node) { applyExpandRanks(node); }
-void PermutationOperationPass::visit(const operation::Comparison &node) { applyExpandRanks(node); }
+void PermutationOperationPass::visit(const ir::operation::Comparison &node)
+{
+ applyExpandRanks(node);
+}
-void PermutationOperationPass::visit(const operation::Div &node) { applyExpandRanks(node); }
+void PermutationOperationPass::visit(const ir::operation::ElementwiseBinary &node)
+{
+ applyExpandRanks(node);
+}
-void PermutationOperationPass::visit(const operation::FullyConnected &node)
+void PermutationOperationPass::visit(const ir::operation::ElementwiseUnary &node)
{
- const auto &input_ind = node.getInputs().at(operation::FullyConnected::Input::INPUT);
+ applyExpandRanks(node);
+}
+
+void PermutationOperationPass::visit(const ir::operation::FullyConnected &node)
+{
+ const auto &input_ind = node.getInputs().at(ir::operation::FullyConnected::Input::INPUT);
const auto &input_obj = _graph.operands().at(input_ind);
const auto &input_shape = input_obj.shape();
@@ -247,9 +263,9 @@ void PermutationOperationPass::visit(const operation::FullyConnected &node)
}
}
-void PermutationOperationPass::visit(const operation::Gather &node)
+void PermutationOperationPass::visit(const ir::operation::Gather &node)
{
- const auto &input_ind = node.getInputs().at(operation::Gather::Input::INPUT);
+ const auto &input_ind = node.getInputs().at(ir::operation::Gather::Input::INPUT);
const auto &input_obj = _graph.operands().at(input_ind);
const auto &input_shape = input_obj.shape();
@@ -263,21 +279,9 @@ void PermutationOperationPass::visit(const operation::Gather &node)
}
}
-void PermutationOperationPass::visit(const operation::LogicalAnd &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::LogicalNot &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::LogicalOr &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::Max &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::Min &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::Mul &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::Pack &node)
+void PermutationOperationPass::visit(const ir::operation::Pack &node)
{
- const auto &input_ind = node.getInputs().at(operation::Reshape::Input::INPUT);
+ const auto &input_ind = node.getInputs().at(ir::operation::Reshape::Input::INPUT);
const auto &input_obj = _graph.operands().at(input_ind);
const auto &input_shape = input_obj.shape();
@@ -291,11 +295,11 @@ void PermutationOperationPass::visit(const operation::Pack &node)
}
}
-void PermutationOperationPass::visit(const operation::PReLU &node) { applyExpandRanks(node); }
+void PermutationOperationPass::visit(const ir::operation::PReLU &node) { applyExpandRanks(node); }
-void PermutationOperationPass::visit(const operation::Reshape &node)
+void PermutationOperationPass::visit(const ir::operation::Reshape &node)
{
- const auto &input_ind = node.getInputs().at(operation::Reshape::Input::INPUT);
+ const auto &input_ind = node.getInputs().at(ir::operation::Reshape::Input::INPUT);
const auto &input_obj = _graph.operands().at(input_ind);
const auto &input_shape = input_obj.shape();
@@ -309,16 +313,14 @@ void PermutationOperationPass::visit(const operation::Reshape &node)
}
}
-void PermutationOperationPass::visit(const operation::SquaredDifference &node)
+void PermutationOperationPass::visit(const ir::operation::SquaredDifference &node)
{
applyExpandRanks(node);
}
-void PermutationOperationPass::visit(const operation::Sub &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::Unpack &node)
+void PermutationOperationPass::visit(const ir::operation::Unpack &node)
{
- const auto &input_ind = node.getInputs().at(operation::Reshape::Input::INPUT);
+ const auto &input_ind = node.getInputs().at(ir::operation::Reshape::Input::INPUT);
const auto &input_obj = _graph.operands().at(input_ind);
const auto &input_shape = input_obj.shape();
@@ -333,5 +335,5 @@ void PermutationOperationPass::visit(const operation::Unpack &node)
}
} // namespace pass
-} // namespace ir
+} // namespace compiler
} // namespace onert
diff --git a/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h
new file mode 100644
index 000000000..2dd76b971
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_PASS_PERMUTATION_OPERATION_PASS_H__
+#define __ONERT_COMPILER_PASS_PERMUTATION_OPERATION_PASS_H__
+
+#include "ir/OperationVisitor.h"
+#include "LoweredOperationPass.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace pass
+{
+
+class PermutationOperationPass : public LoweredOperationPass, public ir::OperationVisitor
+{
+public:
+ using LoweredOperationPass::LoweredOperationPass;
+
+public:
+ std::string id() final { return "PermutationOperationPass"; }
+
+public:
+ void callback(const ir::OperationIndex &i, ir::Operation &n) final;
+
+public:
+ void visit(const ir::operation::BinaryArithmetic &) final;
+ void visit(const ir::operation::Comparison &) final;
+ void visit(const ir::operation::Concat &) final;
+ void visit(const ir::operation::ElementwiseBinary &) final;
+ void visit(const ir::operation::ElementwiseUnary &) final;
+ void visit(const ir::operation::Pack &) final;
+ void visit(const ir::operation::PReLU &) final;
+ void visit(const ir::operation::SquaredDifference &) final;
+ void visit(const ir::operation::Unpack &) final;
+ void visit(const ir::operation::FullyConnected &) final;
+ void visit(const ir::operation::Gather &) final;
+ void visit(const ir::operation::Reshape &) final;
+
+private:
+ void applyExpandRanks(const ir::Operation &);
+ void changeToKeepLayout(const ir::Operation &);
+};
+
+} // namespace pass
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_PASS_PERMUTATION_OPERATION_PASS_H__
diff --git a/runtime/onert/core/src/dumper/dot/DotDumper.h b/runtime/onert/core/src/dumper/dot/DotDumper.h
index 668785a81..fdbca1642 100644
--- a/runtime/onert/core/src/dumper/dot/DotDumper.h
+++ b/runtime/onert/core/src/dumper/dot/DotDumper.h
@@ -15,7 +15,7 @@
*/
#include "ir/Graph.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
#ifndef __ONERT_DUMPER_DOT_DOT_DUMPER_H__
#define __ONERT_DUMPER_DOT_DOT_DUMPER_H__
@@ -42,7 +42,7 @@ public:
: _lowered_graph{nullptr}, _graph(graph), _level{level}
{
}
- DotDumper(const ir::LoweredGraph *lowered_graph, Level level)
+ DotDumper(const compiler::LoweredGraph *lowered_graph, Level level)
: _lowered_graph{lowered_graph}, _graph(_lowered_graph->graph()), _level{level}
{
}
@@ -57,7 +57,7 @@ public:
void dump(const std::string &tag);
private:
- const ir::LoweredGraph *_lowered_graph;
+ const compiler::LoweredGraph *_lowered_graph;
const ir::Graph &_graph;
Level _level;
};
diff --git a/runtime/onert/core/src/exec/DataflowExecutor.cc b/runtime/onert/core/src/exec/DataflowExecutor.cc
index cb516b53a..a69ae9cdb 100644
--- a/runtime/onert/core/src/exec/DataflowExecutor.cc
+++ b/runtime/onert/core/src/exec/DataflowExecutor.cc
@@ -78,11 +78,13 @@ bool DataflowExecutor::noWaitingJobs()
}
DataflowExecutor::DataflowExecutor(
- std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorBuilders &tensor_builders, compiler::CodeMap &&code_map)
- : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_builders},
+ const compiler::TensorRegistries &tensor_regs, backend::TensorManagerSet &&tensor_mgrs,
+ compiler::CodeMap &&code_map)
+ : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
+ std::move(tensor_mgrs)},
_code_map{std::move(code_map)}
{
VERBOSE(DataflowExecutor) << "Constructing Dataflow Executor" << std::endl;
diff --git a/runtime/onert/core/src/exec/DataflowExecutor.h b/runtime/onert/core/src/exec/DataflowExecutor.h
index aebb03c23..8d60e3e4b 100644
--- a/runtime/onert/core/src/exec/DataflowExecutor.h
+++ b/runtime/onert/core/src/exec/DataflowExecutor.h
@@ -49,10 +49,11 @@ public:
* @param tensor_builders Tensor builders that are currently used
* @param code_map OpSequence and its code map
*/
- DataflowExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ DataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorBuilders &tensor_builders, compiler::CodeMap &&code_map);
+ const compiler::TensorRegistries &tensor_regs,
+ backend::TensorManagerSet &&tensor_mgrs, compiler::CodeMap &&code_map);
void executeImpl() override;
diff --git a/runtime/onert/core/src/exec/DynamicShapeInference.cc b/runtime/onert/core/src/exec/DynamicShapeInference.cc
index 5ec7012ee..70bddfce4 100644
--- a/runtime/onert/core/src/exec/DynamicShapeInference.cc
+++ b/runtime/onert/core/src/exec/DynamicShapeInference.cc
@@ -100,17 +100,6 @@ void DynamicShapeInferer::handleSimpleUnaryOp(const ir::Operation &op,
assert(output->buffer() != nullptr);
}
-void DynamicShapeInferer::visit(const ir::operation::Abs &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Abs::INPUT));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Add &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Add::Input::LHS),
- op.getInputs().at(ir::operation::Add::Input::RHS));
-}
-
void DynamicShapeInferer::visit(const ir::operation::ArgMax &op)
{
const auto input_idx{op.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
@@ -155,6 +144,12 @@ void DynamicShapeInferer::visit(const ir::operation::BatchMatMul &op)
dynamicTensorManagerOf(output)->applyShape(output_index, new_shape);
}
+void DynamicShapeInferer::visit(const ir::operation::BinaryArithmetic &op)
+{
+ handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS),
+ op.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS));
+}
+
void DynamicShapeInferer::visit(const ir::operation::BroadcastTo &op)
{
auto output_ind = op.getOutputs().at(0);
@@ -179,11 +174,6 @@ void DynamicShapeInferer::visit(const ir::operation::BroadcastTo &op)
assert(output->buffer() != nullptr);
}
-void DynamicShapeInferer::visit(const ir::operation::Cast &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Cast::INPUT));
-}
-
void DynamicShapeInferer::visit(const ir::operation::Comparison &op)
{
handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Comparison::Input::INPUT0),
@@ -292,20 +282,20 @@ void DynamicShapeInferer::visit(const ir::operation::Conv2D &op)
assert(output->buffer() != nullptr);
}
-void DynamicShapeInferer::visit(const ir::operation::Cos &op)
+void DynamicShapeInferer::visit(const ir::operation::ElementwiseActivation &op)
{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Cos::Input::INPUT));
+ handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseActivation::INPUT));
}
-void DynamicShapeInferer::visit(const ir::operation::Div &op)
+void DynamicShapeInferer::visit(const ir::operation::ElementwiseBinary &op)
{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Div::Input::LHS),
- op.getInputs().at(ir::operation::Div::Input::RHS));
+ handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS),
+ op.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS));
}
-void DynamicShapeInferer::visit(const ir::operation::Exp &op)
+void DynamicShapeInferer::visit(const ir::operation::ElementwiseUnary &op)
{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Exp::Input::INPUT));
+ handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT));
}
void DynamicShapeInferer::visit(const ir::operation::ExpandDims &op)
@@ -430,27 +420,6 @@ void DynamicShapeInferer::visit(const ir::operation::Gather &op)
assert(output->buffer() != nullptr);
}
-void DynamicShapeInferer::visit(const ir::operation::Log &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Log::Input::INPUT));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::LogicalNot &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::LogicalNot::Input::INPUT));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::LogicalOr &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::LogicalOr::Input::INPUT0),
- op.getInputs().at(ir::operation::LogicalOr::Input::INPUT1));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Logistic &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Logistic::INPUT));
-}
-
void DynamicShapeInferer::visit(const ir::operation::L2Normalization &op)
{
handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::L2Normalization::INPUT));
@@ -461,29 +430,6 @@ void DynamicShapeInferer::visit(const ir::operation::MatrixBandPart &op)
handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::MatrixBandPart::INPUT));
}
-void DynamicShapeInferer::visit(const ir::operation::Max &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Max::Input::LHS),
- op.getInputs().at(ir::operation::Max::Input::RHS));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Min &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Min::Input::LHS),
- op.getInputs().at(ir::operation::Min::Input::RHS));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Mul &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Mul::Input::LHS),
- op.getInputs().at(ir::operation::Mul::Input::RHS));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Neg &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Neg::Input::INPUT));
-}
-
void DynamicShapeInferer::visit(const ir::operation::OneHot &op)
{
auto output_ind = op.getOutputs().at(0);
@@ -766,7 +712,7 @@ void DynamicShapeInferer::visit(const ir::operation::ResizeBilinear &op)
if (output_shape != output->getShape() || output->buffer() == nullptr)
{
// change on output shape
- _dynamic_tensor_manager->applyShape(output_ind, output_shape);
+ dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
}
assert(output->buffer() != nullptr);
}
@@ -776,16 +722,6 @@ void DynamicShapeInferer::visit(const ir::operation::Reverse &op)
handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Reverse::INPUT));
}
-void DynamicShapeInferer::visit(const ir::operation::Round &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Round::Input::INPUT));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::RSQRT &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::RSQRT::INPUT));
-}
-
void DynamicShapeInferer::visit(const ir::operation::Select &op)
{
const auto input_cond_idx = op.getInputs().at(ir::operation::Select::Input::CONDITION);
@@ -836,11 +772,6 @@ void DynamicShapeInferer::visit(const ir::operation::Shape &op)
assert(output->buffer() != nullptr);
}
-void DynamicShapeInferer::visit(const ir::operation::Sin &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Sin::Input::INPUT));
-}
-
void DynamicShapeInferer::visit(const ir::operation::Slice &op)
{
const auto input_index{op.getInputs().at(ir::operation::Slice::Input::INPUT)};
@@ -1003,17 +934,6 @@ void DynamicShapeInferer::visit(const ir::operation::StridedSlice &op)
assert(output->buffer() != nullptr);
}
-void DynamicShapeInferer::visit(const ir::operation::Sub &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Sub::Input::LHS),
- op.getInputs().at(ir::operation::Sub::Input::RHS));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Tanh &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Tanh::INPUT));
-}
-
void DynamicShapeInferer::visit(const ir::operation::Tile &op)
{
auto output_ind = op.getOutputs().at(0);
@@ -1091,10 +1011,5 @@ void DynamicShapeInferer::visit(const ir::operation::Unpack &op)
}
}
-void DynamicShapeInferer::visit(const ir::operation::ZerosLike &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ZerosLike::INPUT));
-}
-
} // namespace exec
} // namespace onert
diff --git a/runtime/onert/core/src/exec/Execution.cc b/runtime/onert/core/src/exec/Execution.cc
index 5b401ecf8..7feb3ab68 100644
--- a/runtime/onert/core/src/exec/Execution.cc
+++ b/runtime/onert/core/src/exec/Execution.cc
@@ -38,7 +38,10 @@ void Execution::changeInputShape(const ir::IOIndex &index, const ir::Shape &new_
if (_io_desc.inputs.at(index.value()) != 0)
throw std::runtime_error("Error in calling order");
- _io_desc.input_shape_signature[index] = new_shape;
+ // This will be used later to set input tensor dynamic
+ // Note that 'compiled' model will not be updated with new_shape
+ // but new_shape will change model input shape while 'running' the model
+ _io_desc.dynamic_input_shapes[index] = new_shape;
}
// TODO Remove default parameter
@@ -54,8 +57,8 @@ void Execution::setInput(const ir::IOIndex &index, const void *buffer, size_t le
// if input_shape_sig is set, input_shape_sig overrides shape in info
// note: input_shape_sig contains shape passed by nnfw_set_input_tensorinfo()
{
- auto input_shape_sig = _io_desc.input_shape_signature.find(index);
- auto size_required = (input_shape_sig != _io_desc.input_shape_signature.end())
+ auto input_shape_sig = _io_desc.dynamic_input_shapes.find(index);
+ auto size_required = (input_shape_sig != _io_desc.dynamic_input_shapes.end())
? input_shape_sig->second.num_elements() *
onert::ir::sizeOfDataType(info.typeInfo().type())
: info.total_size();
@@ -154,8 +157,8 @@ bool Execution::isFinished(void) const { return finished; }
ir::Shape Execution::getInputShape(ir::IOIndex ind) const
{
- auto itr = _io_desc.input_shape_signature.find(ind);
- if (itr == _io_desc.input_shape_signature.end())
+ auto itr = _io_desc.dynamic_input_shapes.find(ind);
+ if (itr == _io_desc.dynamic_input_shapes.end())
{
auto operand_idx = primary_subgraph().getInputs().at(ind.value());
return primary_subgraph().operands().at(operand_idx).shape();
diff --git a/runtime/onert/core/src/exec/ExecutorBase.cc b/runtime/onert/core/src/exec/ExecutorBase.cc
index 864ccb31a..f835a9675 100644
--- a/runtime/onert/core/src/exec/ExecutorBase.cc
+++ b/runtime/onert/core/src/exec/ExecutorBase.cc
@@ -26,12 +26,14 @@ namespace onert
namespace exec
{
-ExecutorBase::ExecutorBase(std::unique_ptr<ir::LoweredGraph> &&lowered_graph,
+ExecutorBase::ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph,
const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorBuilders &tensor_builders)
+ const compiler::TensorRegistries &tensor_regs,
+ backend::TensorManagerSet &&tensor_mgrs)
: _lowered_graph{std::move(lowered_graph)}, _graph{_lowered_graph->graph()},
- _input_tensors{input_tensors}, _output_tensors{output_tensors}, _mutex()
+ _input_tensors{input_tensors}, _output_tensors{output_tensors},
+ _tensor_mgrs{std::move(tensor_mgrs)}, _mutex()
{
// TODO Fix the way of knowing whether it is primary or not
bool primary_executor = !(_input_tensors.empty() && _output_tensors.empty());
@@ -41,23 +43,10 @@ ExecutorBase::ExecutorBase(std::unique_ptr<ir::LoweredGraph> &&lowered_graph,
std::vector<std::shared_ptr<backend::ITensor>> list;
for (auto ind : ind_seq)
{
- std::shared_ptr<backend::ITensor> tensor;
- for (auto &tensor_builder : tensor_builders)
- {
- auto tensor_registry = tensor_builder->tensorRegistry();
- assert(tensor_registry);
- tensor = tensor_registry->getNativeITensor(ind);
- if (tensor != nullptr)
- {
- if (tensor_builder->supportDynamicTensor())
- {
- DynAllocInfo dyn_alloc_info{ind, tensor_builder->dynamicTensorManager()};
- _input_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
- }
- break;
- }
- }
+ std::shared_ptr<backend::ITensor> tensor = tensor_regs.getITensor(ind);
assert(tensor != nullptr);
+ DynAllocInfo dyn_alloc_info{ind};
+ _input_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
list.push_back(tensor);
}
return list;
@@ -66,23 +55,10 @@ ExecutorBase::ExecutorBase(std::unique_ptr<ir::LoweredGraph> &&lowered_graph,
std::vector<std::shared_ptr<backend::ITensor>> list;
for (auto ind : ind_seq)
{
- std::shared_ptr<backend::ITensor> tensor;
- for (auto &tensor_builder : tensor_builders)
- {
- auto tensor_registry = tensor_builder->tensorRegistry();
- assert(tensor_registry);
- tensor = tensor_registry->getNativeITensor(ind);
- if (tensor != nullptr)
- {
- if (tensor_builder->supportDynamicTensor())
- {
- DynAllocInfo dyn_alloc_info{ind, tensor_builder->dynamicTensorManager()};
- _output_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
- }
- break;
- }
- }
+ std::shared_ptr<backend::ITensor> tensor = tensor_regs.getITensor(ind);
assert(tensor != nullptr);
+ DynAllocInfo dyn_alloc_info{ind};
+ _output_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
list.push_back(tensor);
}
return list;
@@ -92,42 +68,23 @@ ExecutorBase::ExecutorBase(std::unique_ptr<ir::LoweredGraph> &&lowered_graph,
}
else
{
- // If primary graph, all the inputs and outputs belong to controlflow backend
- auto cf_dyn_tensor_builder = tensor_builders.getControlflowTensorBuilder();
- assert(cf_dyn_tensor_builder);
-
assert(input_tensors.size() == _graph.getInputs().size());
assert(output_tensors.size() == _graph.getOutputs().size());
for (uint32_t i = 0; i < input_tensors.size(); i++)
{
auto tensor = input_tensors[i];
auto ind = _graph.getInputs().at(i);
- DynAllocInfo dyn_alloc_info{ind, cf_dyn_tensor_builder->dynamicTensorManager()};
+ DynAllocInfo dyn_alloc_info{ind};
_input_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
}
for (uint32_t i = 0; i < output_tensors.size(); i++)
{
auto tensor = output_tensors[i];
auto ind = _graph.getOutputs().at(i);
- DynAllocInfo dyn_alloc_info{ind, cf_dyn_tensor_builder->dynamicTensorManager()};
+ DynAllocInfo dyn_alloc_info{ind};
_output_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
}
}
-
- // Prepare each TensorManager on each backend
- for (auto &tensor_builder : tensor_builders)
- {
- auto s_tensor_manager = tensor_builder->releaseStaticTensorManager();
- if (s_tensor_manager != nullptr)
- _tensor_mgrs.insert(std::move(s_tensor_manager));
-
- if (tensor_builder->supportDynamicTensor())
- {
- auto d_tensor_manager = tensor_builder->releaseDynamicTensorManager();
- if (d_tensor_manager != nullptr)
- _tensor_mgrs.insert(std::move(d_tensor_manager));
- }
- }
}
void ExecutorBase::execute(const std::vector<std::shared_ptr<backend::ITensor>> &src_tensors,
@@ -192,8 +149,8 @@ void ExecutorBase::execute(const IODescription &desc)
// TODO Remove dynamic_cast
auto tensor = std::dynamic_pointer_cast<backend::controlflow::UserTensor>(_input_tensors[i]);
assert(tensor);
- auto input_shape = desc.input_shape_signature.find(ir::IOIndex{i});
- if (input_shape != desc.input_shape_signature.end())
+ auto input_shape = desc.dynamic_input_shapes.find(ir::IOIndex{i});
+ if (input_shape != desc.dynamic_input_shapes.end())
{
tensor->set_dynamic();
tensor->setShape(input_shape->second);
@@ -258,8 +215,8 @@ void ExecutorBase::execute(const IODescription &desc)
*/
void ExecutorBase::handleDynamicInputTensor(ir::IOIndex io_ind, const IODescription &desc)
{
- auto shape_sig_found = desc.input_shape_signature.find(io_ind);
- if (shape_sig_found != desc.input_shape_signature.end())
+ auto shape_sig_found = desc.dynamic_input_shapes.find(io_ind);
+ if (shape_sig_found != desc.dynamic_input_shapes.end())
{
auto dyn_alloc_info = _input_to_dyn_alloc_info.find(_input_tensors[io_ind.value()]);
if (dyn_alloc_info == _input_to_dyn_alloc_info.end())
@@ -269,7 +226,9 @@ void ExecutorBase::handleDynamicInputTensor(ir::IOIndex io_ind, const IODescript
auto changed_input_shape = shape_sig_found->second;
auto operand_ind = dyn_alloc_info->second.ind;
- dyn_alloc_info->second.dyn_tensor_manager->applyShape(operand_ind, changed_input_shape);
+ auto dyn_tensor_manager = _input_tensors[io_ind.value()]->dynamic_tensor_manager();
+ assert(dyn_tensor_manager);
+ dyn_tensor_manager->applyShape(operand_ind, changed_input_shape);
}
}
diff --git a/runtime/onert/core/src/exec/ExecutorBase.h b/runtime/onert/core/src/exec/ExecutorBase.h
index 080c9bbdd..a13be7dbf 100644
--- a/runtime/onert/core/src/exec/ExecutorBase.h
+++ b/runtime/onert/core/src/exec/ExecutorBase.h
@@ -25,7 +25,7 @@
#include "Sink.h"
#include "ShapeConverter.h"
#include "exec/IExecutor.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
#include "ir/LowerInfoMap.h"
#include "backend/IConfig.h"
#include "backend/Backend.h"
@@ -33,9 +33,8 @@
#include "exec/IFunction.h"
#include "backend/IDynamicTensorManager.h"
#include "backend/ITensorManager.h"
-#include "backend/ITensorBuilder.h"
#include "exec/ExecutionObservee.h"
-#include "compiler/TensorBuilders.h"
+#include "compiler/TensorRegistries.h"
#include <list>
namespace onert
@@ -51,10 +50,11 @@ public:
* @param graph Graph object
* @param tensor_builders Tensor builders that are currently used
*/
- ExecutorBase(std::unique_ptr<ir::LoweredGraph> &&lowered_graph,
+ ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph,
const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorBuilders &tensor_builders);
+ const compiler::TensorRegistries &tensor_regs,
+ backend::TensorManagerSet &&tensor_mgrs);
virtual ~ExecutorBase() = default;
@@ -102,7 +102,7 @@ protected:
protected:
ExecutionObservee _subject;
std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
- std::unique_ptr<ir::LoweredGraph> _lowered_graph;
+ std::unique_ptr<compiler::LoweredGraph> _lowered_graph;
const ir::Graph &_graph;
std::vector<std::shared_ptr<backend::ITensor>> _input_tensors;
std::vector<std::shared_ptr<backend::ITensor>> _output_tensors;
diff --git a/runtime/onert/core/src/exec/FunctionSequence.cc b/runtime/onert/core/src/exec/FunctionSequence.cc
index d413e8162..fb31f7582 100644
--- a/runtime/onert/core/src/exec/FunctionSequence.cc
+++ b/runtime/onert/core/src/exec/FunctionSequence.cc
@@ -28,7 +28,8 @@ namespace exec
void FunctionSequence::run()
{
- if (_enable_dynamic_shape_inferer)
+ // TODO Find out when `_enable_dynamic_shape_inferer` is true but `_dynamic_tensor_ctx` is false
+ if (_enable_dynamic_shape_inferer && _dynamic_tensor_ctx)
{
if (_dynamic_tensor_ctx->op_seq->size() != _functions.size())
throw std::runtime_error("operation and functions should be mapped one by one");
diff --git a/runtime/onert/core/src/exec/LinearExecutor.h b/runtime/onert/core/src/exec/LinearExecutor.h
index 5c099bc16..c224d3f4f 100644
--- a/runtime/onert/core/src/exec/LinearExecutor.h
+++ b/runtime/onert/core/src/exec/LinearExecutor.h
@@ -46,12 +46,14 @@ public:
* @param tensor_builders Tensor builders that are currently used
* @param code_map OpSequence and its code map
*/
- LinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ LinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorBuilders &tensor_builders, compiler::CodeMap &&code_map,
+ const compiler::TensorRegistries &tensor_regs,
+ backend::TensorManagerSet &&tensor_mgrs, compiler::CodeMap &&code_map,
const std::vector<ir::OpSequenceIndex> &order)
- : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_builders}
+ : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
+ std::move(tensor_mgrs)}
{
for (auto index : order)
{
diff --git a/runtime/onert/core/src/exec/ParallelExecutor.cc b/runtime/onert/core/src/exec/ParallelExecutor.cc
index b5d81778f..ab234aacd 100644
--- a/runtime/onert/core/src/exec/ParallelExecutor.cc
+++ b/runtime/onert/core/src/exec/ParallelExecutor.cc
@@ -60,12 +60,13 @@ void ParallelExecutor::notify(uint32_t finished_job_id)
}
ParallelExecutor::ParallelExecutor(
- std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorBuilders &tensor_builders, compiler::CodeMap &&code_map)
- : DataflowExecutor{std::move(lowered_graph), input_tensors, output_tensors, tensor_builders,
- std::move(code_map)}
+ const compiler::TensorRegistries &tensor_regs, backend::TensorManagerSet &&tensor_mgrs,
+ compiler::CodeMap &&code_map)
+ : DataflowExecutor{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
+ std::move(tensor_mgrs), std::move(code_map)}
{
VERBOSE(ParallelExecutor) << "Constructing Parallel Executor" << std::endl;
}
diff --git a/runtime/onert/core/src/exec/ParallelExecutor.h b/runtime/onert/core/src/exec/ParallelExecutor.h
index 462cbc6a8..929edfce9 100644
--- a/runtime/onert/core/src/exec/ParallelExecutor.h
+++ b/runtime/onert/core/src/exec/ParallelExecutor.h
@@ -50,10 +50,11 @@ public:
* @param tensor_builders Tensor builders that are currently used
* @param code_map OpSequence and its code map
*/
- ParallelExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ ParallelExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorBuilders &tensor_builders, compiler::CodeMap &&code_map);
+ const compiler::TensorRegistries &tensor_regs,
+ backend::TensorManagerSet &&tensor_mgrs, compiler::CodeMap &&code_map);
void executeImpl() override;
diff --git a/runtime/onert/core/src/exec/feature/nchw/Reader.h b/runtime/onert/core/src/exec/feature/nchw/Reader.h
index 48642d8ef..7be9df4d5 100644
--- a/runtime/onert/core/src/exec/feature/nchw/Reader.h
+++ b/runtime/onert/core/src/exec/feature/nchw/Reader.h
@@ -33,7 +33,7 @@ namespace feature
namespace nchw
{
-template <typename T> class Reader final : public feature::Reader<T>
+template <typename T> class Reader : public feature::Reader<T>
{
public:
// Construct for buffer of model inputs
@@ -68,15 +68,14 @@ public:
}
public:
- T at(uint32_t ch, uint32_t row, uint32_t col) const override
+ T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const final
{
- const auto offset = feature_index_to_byte_offset(0, ch, row, col);
-
- const T *ptr = reinterpret_cast<const T *>(_ptr + offset);
-
- return *ptr;
+ return getRef(batch, ch, row, col);
}
- T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const override
+ T at(uint32_t ch, uint32_t row, uint32_t col) const final { return getRef(0, ch, row, col); }
+
+protected:
+ const T &getRef(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const
{
const auto offset = feature_index_to_byte_offset(batch, ch, row, col);
diff --git a/runtime/onert/core/src/exec/feature/nchw/View.h b/runtime/onert/core/src/exec/feature/nchw/View.h
index ff55de199..dbaf1a91e 100644
--- a/runtime/onert/core/src/exec/feature/nchw/View.h
+++ b/runtime/onert/core/src/exec/feature/nchw/View.h
@@ -17,7 +17,7 @@
#ifndef __ONERT_EXEC_FEATURE_NCHW_VIEW_H__
#define __ONERT_EXEC_FEATURE_NCHW_VIEW_H__
-#include "../Reader.h"
+#include "Reader.h"
#include "backend/ITensor.h"
#include "ir/Shape.h"
@@ -34,99 +34,31 @@ namespace feature
namespace nchw
{
-template <typename T> class View final : public feature::Reader<T>
+template <typename T> class View final : public Reader<T>
{
public:
// Construct for buffer of model inputs
- View(const ir::FeatureShape &shape, T *ptr, size_t len)
- : _shape{shape}, _ptr{reinterpret_cast<uint8_t *>(ptr)}, _len{len}
+ View(const ir::FeatureShape &shape, T *ptr, size_t len) : Reader<T>{shape, ptr, len}
{
- assert(shape.N * shape.C * shape.H * shape.W * sizeof(T) == len);
-
- _strides.W = sizeof(T);
- _strides.H = shape.W * sizeof(T);
- _strides.C = shape.W * shape.H * sizeof(T);
- _strides.N = shape.W * shape.H * shape.C * sizeof(T);
+ // DO NOTHING
}
// Construct for backend tensor
- View(::onert::backend::ITensor *tensor)
- : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
- {
- assert(tensor->layout() == ir::Layout::NCHW);
-
- const auto start_offset = tensor->calcOffset({0, 0, 0, 0});
- _strides.W = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
- _strides.H = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
- _strides.C = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
- _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
-
- _shape.W = tensor->dimension(3);
- _shape.H = tensor->dimension(2);
- _shape.C = tensor->dimension(1);
- _shape.N = tensor->dimension(0);
- }
-
-public:
- T at(uint32_t ch, uint32_t row, uint32_t col) const override
+ View(::onert::backend::ITensor *tensor) : Reader<T>{tensor}
{
- const auto offset = feature_index_to_byte_offset(0, ch, row, col);
-
- T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
- return *ptr;
- }
- T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const override
- {
- const auto offset = feature_index_to_byte_offset(batch, ch, row, col);
-
- T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
- return *ptr;
+ // DO NOTHING
}
public:
- T &at(uint32_t ch, uint32_t row, uint32_t col)
- {
- const auto offset = feature_index_to_byte_offset(0, ch, row, col);
-
- T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
- return *ptr;
- }
+ using Reader<T>::at;
T &at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col)
{
- const auto offset = feature_index_to_byte_offset(batch, ch, row, col);
-
- T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
- return *ptr;
+ return const_cast<T &>(Reader<T>::getRef(batch, ch, row, col));
}
-
-private:
- size_t feature_index_to_byte_offset(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const
+ T &at(uint32_t ch, uint32_t row, uint32_t col)
{
- assert(1u * _shape.N > batch); // shape.N > batch
- assert(1u * _shape.C > ch); // shape.C > ch
- assert(1u * _shape.H > row); // shape.H > row
- assert(1u * _shape.W > col); // shape.W > col
-
- uint32_t res = 0;
- res += batch * _strides.N;
- res += ch * _strides.C;
- res += row * _strides.H;
- res += col * _strides.W;
-
- return res;
+ return const_cast<T &>(Reader<T>::getRef(0, ch, row, col));
}
-
-private:
- // TODO Remove _shape
- ir::FeatureShape _shape;
- using Strides = ir::FeatureShape;
- Strides _strides;
- uint8_t *_ptr;
- size_t _len;
};
} // namespace nchw
diff --git a/runtime/onert/core/src/exec/feature/nhwc/Reader.h b/runtime/onert/core/src/exec/feature/nhwc/Reader.h
index ef27992c3..7730cee72 100644
--- a/runtime/onert/core/src/exec/feature/nhwc/Reader.h
+++ b/runtime/onert/core/src/exec/feature/nhwc/Reader.h
@@ -34,7 +34,7 @@ namespace feature
namespace nhwc
{
-template <typename T> class Reader final : public feature::Reader<T>
+template <typename T> class Reader : public feature::Reader<T>
{
public:
// Construct for buffer of model inputs
@@ -70,15 +70,14 @@ public:
}
public:
- T at(uint32_t row, uint32_t col, uint32_t ch) const override
+ T at(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const final
{
- const auto offset = feature_index_to_byte_offset(0, row, col, ch);
-
- const T *ptr = reinterpret_cast<const T *>(_ptr + offset);
-
- return *ptr;
+ return getRef(batch, row, col, ch);
}
- T at(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const override
+ T at(uint32_t row, uint32_t col, uint32_t ch) const final { return getRef(0, row, col, ch); }
+
+protected:
+ const T &getRef(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const
{
const auto offset = feature_index_to_byte_offset(batch, row, col, ch);
diff --git a/runtime/onert/core/src/exec/feature/nhwc/View.h b/runtime/onert/core/src/exec/feature/nhwc/View.h
index a09961a84..72c8c3415 100644
--- a/runtime/onert/core/src/exec/feature/nhwc/View.h
+++ b/runtime/onert/core/src/exec/feature/nhwc/View.h
@@ -35,101 +35,31 @@ namespace feature
namespace nhwc
{
-template <typename T> class View final : public feature::Reader<T>
+template <typename T> class View final : public Reader<T>
{
public:
// Construct for buffer of model inputs
- View(const ir::FeatureShape &shape, T *ptr, size_t len)
- : _shape{shape}, _ptr{reinterpret_cast<uint8_t *>(ptr)}, _len{len}
+ View(const ir::FeatureShape &shape, T *ptr, size_t len) : Reader<T>{shape, ptr, len}
{
- UNUSED_RELEASE(len); // Workaround for unused variable in release mode
- assert(shape.N * shape.H * shape.W * shape.C * sizeof(T) == len);
-
- // No padding
- _strides.C = sizeof(T);
- _strides.W = shape.C * sizeof(T);
- _strides.H = shape.C * shape.W * sizeof(T);
- _strides.N = shape.C * shape.W * shape.H * sizeof(T);
+ // DO NOTHING
}
// Construct for backend tensor
- View(backend::ITensor *tensor)
- : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
+ View(backend::ITensor *tensor) : Reader<T>{tensor}
{
- assert(tensor->layout() == ir::Layout::NHWC);
-
- const auto start_offset = tensor->calcOffset({0, 0, 0, 0});
- _strides.C = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
- _strides.W = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
- _strides.H = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
- _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
-
- _shape.C = tensor->dimension(3);
- _shape.W = tensor->dimension(2);
- _shape.H = tensor->dimension(1);
- _shape.N = tensor->dimension(0);
+ // DO NOTHING
}
public:
- T at(uint32_t row, uint32_t col, uint32_t ch) const override
- {
- const auto offset = feature_index_to_byte_offset(0, row, col, ch);
-
- const T *ptr = reinterpret_cast<const T *>(_ptr + offset);
-
- return *ptr;
- }
- T at(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const override
- {
- const auto offset = feature_index_to_byte_offset(batch, row, col, ch);
-
- const T *ptr = reinterpret_cast<const T *>(_ptr + offset);
-
- return *ptr;
- }
-
- T &at(uint32_t row, uint32_t col, uint32_t ch)
- {
- const auto offset = feature_index_to_byte_offset(0, row, col, ch);
-
- T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
- return *ptr;
- }
-
+ using Reader<T>::at;
T &at(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch)
{
- const auto offset = feature_index_to_byte_offset(batch, row, col, ch);
-
- T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
- return *ptr;
+ return const_cast<T &>(Reader<T>::getRef(batch, row, col, ch));
}
-
-private:
- size_t feature_index_to_byte_offset(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const
+ T &at(uint32_t row, uint32_t col, uint32_t ch)
{
- assert(1u * _shape.N > batch); // shape.N > batch
- assert(1u * _shape.H > row); // shape.H > row
- assert(1u * _shape.W > col); // shape.W > col
- assert(1u * _shape.C > ch); // shape.C > ch
-
- uint32_t res = 0;
- res += batch * _strides.N;
- res += row * _strides.H;
- res += col * _strides.W;
- res += ch * _strides.C;
-
- return res;
+ return const_cast<T &>(Reader<T>::getRef(0, row, col, ch));
}
-
-private:
- // TODO Remove _shape
- ir::FeatureShape _shape;
- using Strides = ir::FeatureShape;
- Strides _strides;
- uint8_t *_ptr;
- size_t _len;
};
} // namespace nhwc
diff --git a/runtime/onert/core/src/interp/InterpOps.lst b/runtime/onert/core/src/interp/InterpOps.lst
index 5f646b83f..0714df38a 100644
--- a/runtime/onert/core/src/interp/InterpOps.lst
+++ b/runtime/onert/core/src/interp/InterpOps.lst
@@ -22,43 +22,32 @@
//
// Same list with Operations.lst
// Make comment out if operation is not supported in interpreter
-INTERP_OP(Add)
-INTERP_OP(Sub)
+INTERP_OP(BinaryArithmetic)
//INTERP_OP(BatchToSpaceND)
//INTERP_OP(Cast)
INTERP_OP(Conv2D)
INTERP_OP(DepthwiseConv2D)
-INTERP_OP(AvgPool2D)
-INTERP_OP(MaxPool2D)
+INTERP_OP(Pool2D)
INTERP_OP(Concat)
INTERP_OP(FullyConnected)
//INTERP_OP(Reduce)
INTERP_OP(Reshape)
-INTERP_OP(Mul)
INTERP_OP(Softmax)
//INTERP_OP(Squeeze)
//INTERP_OP(Slice)
//INTERP_OP(StridedSlice)
-INTERP_OP(Tanh)
-INTERP_OP(Logistic)
-//INTERP_OP(Div)
+INTERP_OP(ElementwiseActivation)
//INTERP_OP(Transpose)
//INTERP_OP(Exp)
//INTERP_OP(Comparison)
-//INTERP_OP(LogicalAnd)
-//INTERP_OP(LogicalOr)
//INTERP_OP(LogicalNot)
//INTERP_OP(LSTM)
//INTERP_OP(RSQRT)
-INTERP_OP(ReLU)
//INTERP_OP(ResizeBilinear)
-INTERP_OP(ReLU1)
-INTERP_OP(ReLU6)
//INTERP_OP(RNN)
//INTERP_OP(Floor)
//INTERP_OP(SpaceToBatchND)
//INTERP_OP(SpaceToDepth)
-//INTERP_OP(L2Pool2D)
//INTERP_OP(EmbeddingLookup)
//INTERP_OP(L2Normalization)
//INTERP_OP(HashtableLookup)
@@ -81,6 +70,4 @@ INTERP_OP(Gather)
INTERP_OP(Pad)
//INTERP_OP(Custom)
//INTERP_OP(Permute)
-//INTERP_OP(Min)
-//INTERP_OP(Max)
//INTERP_OP(OneHot)
diff --git a/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc b/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc
index 44c955421..86e883524 100644
--- a/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc
+++ b/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc
@@ -19,9 +19,7 @@
#include "OperationUtil.h"
#include "interp/Registration.h"
-#include "ir/operation/Add.h"
-#include "ir/operation/Sub.h"
-#include "ir/operation/Mul.h"
+#include "ir/operation/BinaryArithmetic.h"
#include "misc/polymorphic_downcast.h"
#include "cker/Types.h"
@@ -39,12 +37,13 @@ enum class OpType
MUL
};
-template <typename node_type> void prepareAdd(ExecEnv *env, const ir::Operation &node)
+void prepare(ExecEnv *env, const ir::Operation &node)
{
- const auto &add_node = nnfw::misc::polymorphic_downcast<const node_type &>(node);
+ const auto &arithmetic_node =
+ nnfw::misc::polymorphic_downcast<const ir::operation::BinaryArithmetic &>(node);
- const auto lhs_index = node.getInputs().at(add_node.LHS);
- const auto rhs_index = node.getInputs().at(add_node.RHS);
+ const auto lhs_index = node.getInputs().at(arithmetic_node.LHS);
+ const auto rhs_index = node.getInputs().at(arithmetic_node.RHS);
const auto out_index = node.getOutputs().at(0);
const auto lhs_tensor = env->tensorAt(lhs_index);
@@ -54,7 +53,7 @@ template <typename node_type> void prepareAdd(ExecEnv *env, const ir::Operation
// TODO Util function to compare TensorInfo
if (lhs_tensor->data_type() != rhs_tensor->data_type())
{
- throw std::runtime_error{"Interp(Add): Different input types"};
+ throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Different input types"};
}
bool try_broadcast = (lhs_tensor->tensorInfo().shape() != rhs_tensor->tensorInfo().shape());
@@ -65,7 +64,7 @@ template <typename node_type> void prepareAdd(ExecEnv *env, const ir::Operation
rhs_tensor->tensorInfo().shape(), success);
if (!success)
{
- throw std::runtime_error{"Interp(Add): Fail to brodcasting"};
+ throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Fail to brodcasting"};
}
auto output_info =
@@ -86,7 +85,7 @@ template <typename node_type> void prepareAdd(ExecEnv *env, const ir::Operation
// TODO Util function to compare TensorInfo
if (lhs_tensor->data_type() != out_tensor->data_type())
{
- throw std::runtime_error{"Interp(Add): Invalid output type"};
+ throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Invalid output type"};
}
}
@@ -103,9 +102,9 @@ inline void setActivationParams(int32_t min, int32_t max,
params->quantized_activation_max = max;
}
-template <typename raw_type, typename param_type, OpType op_type>
+template <typename raw_type, OpType op_type>
void invoke(const ITensor *lhs_tensor, const ITensor *rhs_tensor, const ITensor *out_tensor,
- const param_type &param)
+ const ir::operation::BinaryArithmetic::Param &param)
{
const auto lhs_buffer = lhs_tensor->bufferRO();
const auto rhs_buffer = rhs_tensor->bufferRO();
@@ -146,13 +145,11 @@ void invoke(const ITensor *lhs_tensor, const ITensor *rhs_tensor, const ITensor
out_shape, out_ptr);
}
-template <typename node_type, typename param_type, OpType op_type>
-void invokeAdd(const ExecEnv *env, const ir::Operation &node)
+template <OpType op_type>
+void invokeBinaryArithmetic(const ExecEnv *env, const ir::operation::BinaryArithmetic &node)
{
- const auto &arithmetic_node = nnfw::misc::polymorphic_downcast<const node_type &>(node);
-
- const auto lhs_index = node.getInputs().at(arithmetic_node.LHS);
- const auto rhs_index = node.getInputs().at(arithmetic_node.RHS);
+ const auto lhs_index = node.getInputs().at(node.LHS);
+ const auto rhs_index = node.getInputs().at(node.RHS);
const auto out_index = node.getOutputs().at(0);
const auto lhs_tensor = env->tensorAt(lhs_index);
const auto rhs_tensor = env->tensorAt(rhs_index);
@@ -161,38 +158,46 @@ void invokeAdd(const ExecEnv *env, const ir::Operation &node)
if (data_type == ir::DataType::INT32)
{
- invoke<int32_t, param_type, op_type>(lhs_tensor, rhs_tensor, out_tensor,
- arithmetic_node.param());
+ invoke<int32_t, op_type>(lhs_tensor, rhs_tensor, out_tensor, node.param());
}
else if (data_type == ir::DataType::FLOAT32)
{
- invoke<float, param_type, op_type>(lhs_tensor, rhs_tensor, out_tensor, arithmetic_node.param());
+ invoke<float, op_type>(lhs_tensor, rhs_tensor, out_tensor, node.param());
}
else
{
throw std::runtime_error{"NYI: Unsupported data type"};
}
}
-} // namespace
-OpKernel *getAdd()
+void invokeBinaryArithmeticOps(const ExecEnv *env, const ir::Operation &node)
{
- static OpKernel kernel = {prepareAdd<ir::operation::Add>,
- invokeAdd<ir::operation::Add, ir::operation::Add::Param, OpType::ADD>};
- return &kernel;
-}
+ const auto &arithmetic_node =
+ nnfw::misc::polymorphic_downcast<const ir::operation::BinaryArithmetic &>(node);
-OpKernel *getSub()
-{
- static OpKernel kernel = {prepareAdd<ir::operation::Sub>,
- invokeAdd<ir::operation::Sub, ir::operation::Sub::Param, OpType::SUB>};
- return &kernel;
+ switch (arithmetic_node.param().arithmetic_type)
+ {
+ case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+ invokeBinaryArithmetic<OpType::ADD>(env, arithmetic_node);
+ break;
+ case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+ invokeBinaryArithmetic<OpType::SUB>(env, arithmetic_node);
+ break;
+ case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+ invokeBinaryArithmetic<OpType::MUL>(env, arithmetic_node);
+ break;
+ default:
+ throw std::runtime_error{"Interp(BinaryArithmetic): NYI unsupported operation " +
+ arithmetic_node.name()};
+ break;
+ }
}
-OpKernel *getMul()
+} // namespace
+
+OpKernel *getBinaryArithmetic()
{
- static OpKernel kernel = {prepareAdd<ir::operation::Mul>,
- invokeAdd<ir::operation::Mul, ir::operation::Mul::Param, OpType::MUL>};
+ static OpKernel kernel = {prepare, invokeBinaryArithmeticOps};
return &kernel;
}
diff --git a/runtime/onert/core/src/interp/operations/UnaryActivations.cc b/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc
index ea5e2417b..c8773bef4 100644
--- a/runtime/onert/core/src/interp/operations/UnaryActivations.cc
+++ b/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc
@@ -20,10 +20,11 @@
#include "interp/Registration.h"
-#include "ir/operation/ReLU.h"
-#include "ir/operation/ReLU1.h"
-#include "ir/operation/ReLU6.h"
-#include "ir/operation/Tanh.h"
+#include "ir/operation/ElementwiseActivation.h"
+
+#include <misc/polymorphic_downcast.h>
+#include <cker/operation/Logistic.h>
+#include <cker/operation/Tanh.h>
namespace onert
{
@@ -34,9 +35,8 @@ namespace
enum class ActivationType
{
+ Logistic,
ReLU,
- ReLU1,
- ReLU6,
Tanh
};
@@ -65,30 +65,25 @@ void prepare(ExecEnv *env, const ir::Operation &node)
// TODO Util function to compare TensorInfo
if (input_tensor->data_type() != output_tensor->data_type())
{
- throw std::runtime_error{"Interp(Activations): Invalid output type"};
+ throw std::runtime_error{"Interp(ElementwiseActivation): Invalid output type"};
}
}
template <ActivationType act_type>
-void evalFloat(const float *input_ptr, float *output_ptr, uint64_t num_elements)
+void evalFloat(const float *input_ptr, float *output_ptr, uint64_t num_elements, float alpha,
+ float beta)
{
std::function<float(const float &)> fn = [](const float &) { return std::nanf(""); };
switch (act_type)
{
case ActivationType::ReLU:
- fn = [](const float &in) { return std::max(0.f, in); };
- break;
- case ActivationType::ReLU1:
- fn = [](const float &in) { return std::min(std::max(-1.f, in), 1.f); };
- break;
- case ActivationType::ReLU6:
- fn = [](const float &in) { return std::min(std::max(0.f, in), 6.f); };
+ fn = [alpha, beta](const float &in) { return std::min(std::max(beta, in), alpha); };
break;
case ActivationType::Tanh:
fn = [](const float &in) { return std::tanh(in); };
break;
default:
- throw std::runtime_error{"Interp(Activations): NYI - Unsupported activation"};
+ throw std::runtime_error{"Interp(ElementwiseActivation): NYI - Unsupported activation"};
break;
}
@@ -114,38 +109,51 @@ template <ActivationType act_type> void invoke(const ExecEnv *env, const ir::Ope
uint64_t elements = input_tensor->num_elements();
const float *input_start = reinterpret_cast<const float *>(input_tensor->bufferRO());
float *out = reinterpret_cast<float *>(output_tensor->buffer());
-
- evalFloat<act_type>(input_start, out, elements);
+ if (act_type == ActivationType::Logistic)
+ {
+ const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
+ const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
+ nnfw::cker::Logistic(cker_input_shape, input_start, cker_output_shape, out);
+ }
+ else
+ {
+ const auto &act_node =
+ nnfw::misc::polymorphic_downcast<const ir::operation::ElementwiseActivation &>(node);
+ evalFloat<act_type>(input_start, out, elements, act_node.param().alpha,
+ act_node.param().beta);
+ }
}
else
{
- throw std::runtime_error{"Interp(ReLU6): NYI - Support float only"};
+ throw std::runtime_error{"Interp(" + node.name() + "): NYI - Support float only"};
}
}
-} // namespace
-
-OpKernel *getReLU()
+void invokeElementwiseActivation(const ExecEnv *env, const ir::Operation &node)
{
- static OpKernel kernel = {prepare, invoke<ActivationType::ReLU>};
- return &kernel;
-}
-
-OpKernel *getReLU1()
-{
- static OpKernel kernel = {prepare, invoke<ActivationType::ReLU1>};
- return &kernel;
+ const auto &act_node =
+ nnfw::misc::polymorphic_downcast<const ir::operation::ElementwiseActivation &>(node);
+ switch (act_node.param().op_type)
+ {
+ case ir::operation::ElementwiseActivation::Type::LOGISTIC:
+ invoke<ActivationType::Logistic>(env, node);
+ break;
+ case ir::operation::ElementwiseActivation::Type::RELU:
+ invoke<ActivationType::ReLU>(env, node);
+ break;
+ case ir::operation::ElementwiseActivation::Type::TANH:
+ invoke<ActivationType::Tanh>(env, node);
+ break;
+ default:
+ throw std::runtime_error("Interp(" + node.name() + "): NYI - Unsupported activation");
+ }
}
-OpKernel *getReLU6()
-{
- static OpKernel kernel = {prepare, invoke<ActivationType::ReLU6>};
- return &kernel;
-}
+} // namespace
-OpKernel *getTanh()
+OpKernel *getElementwiseActivation()
{
- static OpKernel kernel = {prepare, invoke<ActivationType::Tanh>};
+ static OpKernel kernel = {prepare, invokeElementwiseActivation};
return &kernel;
}
diff --git a/runtime/onert/core/src/interp/operations/Logistic.cc b/runtime/onert/core/src/interp/operations/Logistic.cc
deleted file mode 100644
index c23cbb782..000000000
--- a/runtime/onert/core/src/interp/operations/Logistic.cc
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/Logistic.h>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-#include "ir/operation/Logistic.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void prepareLogistic(ExecEnv *env, const ir::Operation &node)
-{
- const auto input_index = node.getInputs().at(0);
- const auto output_index = node.getOutputs().at(0);
-
- const auto input_tensor = env->tensorAt(input_index);
-
- const auto output_info = env->graph().operands().at(output_index).info();
-
- // Check shape and type lhs is same with rhs
- // TODO Util function to compare TensorInfo
- if (output_info.total_size() == 0)
- {
- throw std::runtime_error{"Interp(TConv): NYI unspecified output shape"};
- }
- else
- {
- env->allocateIfNeeded(output_index, output_info);
- }
-
- const auto output_tensor = env->tensorAt(output_index);
- if (input_tensor->data_type() != output_tensor->data_type())
- {
- throw std::runtime_error{"Interp(Logistic): Invalid output type"};
- }
-}
-
-void invoke(const ITensor *input_tensor, const ITensor *output_tensor)
-{
- const auto input_buffer = input_tensor->bufferRO();
- auto output_buffer = output_tensor->buffer();
-
- const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
- const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
- const float *input_ptr = reinterpret_cast<const float *>(input_buffer);
- float *output_ptr = reinterpret_cast<float *>(output_buffer);
-
- nnfw::cker::Logistic(cker_input_shape, input_ptr, cker_output_shape, output_ptr);
-}
-
-void invokeLogistic(const ExecEnv *env, const ir::Operation &node)
-{
- const auto input_index = node.getInputs().at(0);
- const auto output_index = node.getOutputs().at(0);
-
- const auto input_tensor = env->tensorAt(input_index);
- const auto output_tensor = env->tensorAt(output_index);
-
- const auto data_type = input_tensor->data_type();
-
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(input_tensor, output_tensor);
- }
- else
- {
- throw std::runtime_error{"Interp(Logistic): NYI - Unsupported data type"};
- }
-}
-} // namespace
-
-OpKernel *getLogistic()
-{
- static OpKernel kernel = {prepareLogistic, invokeLogistic};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/MaxPool2D.cc b/runtime/onert/core/src/interp/operations/MaxPool2D.cc
deleted file mode 100644
index 313948fb6..000000000
--- a/runtime/onert/core/src/interp/operations/MaxPool2D.cc
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/MaxPool.h>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-#include "ir/operation/MaxPool2D.h"
-#include "util/Utils.h"
-#include "util/ShapeInference.h"
-#include "misc/polymorphic_downcast.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void prepareMaxPool2D(ExecEnv *env, const ir::Operation &node)
-{
- const auto in_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- const auto in_tensor = env->tensorAt(in_index);
-
- assert(in_tensor->num_dimensions() == 4);
- UNUSED_RELEASE(in_tensor);
-
- const auto output_info = env->graph().operands().at(out_index).info();
- if (output_info.total_size() == 0)
- {
- // Handle unspecified output shape
- const auto &maxpool_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::MaxPool2D &>(node);
- const auto infered_output_shape =
- shape_inference::inferMaxPoolShape(in_tensor->tensorInfo().shape(), maxpool_node.param());
- env->allocateIfNeeded(
- out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
- }
- else
- {
- env->allocateIfNeeded(out_index, output_info);
- }
-
- auto out_tensor = env->tensorAt(out_index);
- UNUSED_RELEASE(out_tensor);
-
- // Handle same ifm & ofm data type only
- assert(in_tensor->data_type() == out_tensor->data_type());
- assert(out_tensor->num_dimensions() == 4);
-}
-
-void invoke(const ITensor *in_tensor, const ITensor *out_tensor,
- const ir::operation::MaxPool2D::Param &param)
-{
- // TODO support NCHW frontend
- const auto ifm_shape = in_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- const auto padding =
- ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, param.kw, param.kh);
- // Calculate
- nnfw::cker::PoolParams cker_param;
- calculateActivationRange(param.activation, &cker_param.float_activation_min,
- &cker_param.float_activation_max);
- cker_param.filter_width = param.kw;
- cker_param.filter_height = param.kh;
- cker_param.padding_values.width = padding.left;
- cker_param.padding_values.height = padding.top;
- cker_param.stride_width = param.stride.horizontal;
- cker_param.stride_height = param.stride.vertical;
-
- const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
- const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
- const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
- float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
-
- nnfw::cker::MaxPool(cker_param, in_shape, in_ptr, out_shape, out_ptr);
-}
-
-void invokeMaxPool2D(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &maxpool_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::MaxPool2D &>(node);
-
- const auto in_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- const auto in_tensor = env->tensorAt(in_index);
- const auto out_tensor = env->tensorAt(out_index);
-
- const auto data_type = in_tensor->data_type();
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(in_tensor, out_tensor, maxpool_node.param());
- }
- else
- {
- throw std::runtime_error{"NYI: Support float32 only"};
- }
-}
-} // namespace
-
-OpKernel *getMaxPool2D()
-{
- static OpKernel kernel = {prepareMaxPool2D, invokeMaxPool2D};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/AvgPool2D.cc b/runtime/onert/core/src/interp/operations/Pool2D.cc
index 42fe42301..92f9d70b2 100644
--- a/runtime/onert/core/src/interp/operations/AvgPool2D.cc
+++ b/runtime/onert/core/src/interp/operations/Pool2D.cc
@@ -15,11 +15,12 @@
*/
#include <cker/operation/AveragePool.h>
+#include <cker/operation/MaxPool.h>
#include "OperationUtil.h"
#include "interp/Registration.h"
-#include "ir/operation/AvgPool2D.h"
+#include "ir/operation/Pool2D.h"
#include "util/Utils.h"
#include "util/ShapeInference.h"
#include "misc/polymorphic_downcast.h"
@@ -28,12 +29,13 @@ namespace onert
{
namespace interp
{
-namespace avgpool2d
+namespace pool2d
{
-void prepareAvgPool2D(ExecEnv *env, const ir::Operation &node)
+void preparePool2D(ExecEnv *env, const ir::Operation &node)
{
- const auto in_index = node.getInputs().at(0);
+ const auto &pool_node = nnfw::misc::polymorphic_downcast<const ir::operation::Pool2D &>(node);
+ const auto in_index = node.getInputs().at(pool_node.INPUT);
const auto out_index = node.getOutputs().at(0);
const auto in_tensor = env->tensorAt(in_index);
@@ -45,10 +47,8 @@ void prepareAvgPool2D(ExecEnv *env, const ir::Operation &node)
if (output_info.total_size() == 0)
{
// Handle unspecified output shape
- const auto &avgpool_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::AvgPool2D &>(node);
const auto infered_output_shape =
- shape_inference::inferAvgPoolShape(in_tensor->tensorInfo().shape(), avgpool_node.param());
+ shape_inference::inferPoolShape(in_tensor->tensorInfo().shape(), pool_node.param());
env->allocateIfNeeded(
out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
}
@@ -65,18 +65,44 @@ void prepareAvgPool2D(ExecEnv *env, const ir::Operation &node)
assert(out_tensor->num_dimensions() == 4);
}
-void invoke(const ITensor *in_tensor, const ITensor *out_tensor,
- const ir::operation::AvgPool2D::Param &param)
+template <typename T>
+void invoke(const nnfw::cker::PoolParams &params, const nnfw::cker::Shape &in_shape,
+ const T *in_ptr, const nnfw::cker::Shape &out_shape, T *out_ptr,
+ ir::operation::Pool2D::PoolType op_type)
{
- // TODO Support NCHW frontend
+ switch (op_type)
+ {
+ case ir::operation::Pool2D::PoolType::AVG:
+ nnfw::cker::AveragePool<T>(params, in_shape, in_ptr, out_shape, out_ptr);
+ break;
+ case ir::operation::Pool2D::PoolType::MAX:
+ nnfw::cker::MaxPool<T>(params, in_shape, in_ptr, out_shape, out_ptr);
+ break;
+ default:
+ throw std::runtime_error{"Interp(Pool2D): NYI unsupported operation"};
+ break;
+ }
+}
+
+void invokePool2DOps(const ExecEnv *env, const ir::Operation &node)
+{
+ const auto &pool_node = nnfw::misc::polymorphic_downcast<const ir::operation::Pool2D &>(node);
+
+ const auto in_index = node.getInputs().at(0);
+ const auto out_index = node.getOutputs().at(0);
+
+ // Check lhs shape is same with rhs (with broadcast)
+ const auto in_tensor = env->tensorAt(in_index);
+ const auto out_tensor = env->tensorAt(out_index);
+
+ // TODO support NCHW frontend
const auto ifm_shape = in_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
+ const auto param = pool_node.param();
const auto padding =
ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, param.kw, param.kh);
// Calculate
nnfw::cker::PoolParams cker_param;
- calculateActivationRange(param.activation, &cker_param.float_activation_min,
- &cker_param.float_activation_max);
cker_param.filter_width = param.kw;
cker_param.filter_height = param.kh;
cker_param.padding_values.width = padding.left;
@@ -84,41 +110,29 @@ void invoke(const ITensor *in_tensor, const ITensor *out_tensor,
cker_param.stride_width = param.stride.horizontal;
cker_param.stride_height = param.stride.vertical;
- const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
- const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
- const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
- float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
-
- nnfw::cker::AveragePool(cker_param, in_shape, in_ptr, out_shape, out_ptr);
-}
-
-void invokeAvgPool2D(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &avgpool_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::AvgPool2D &>(node);
-
- const auto in_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- // Check lhs shape is same with rhs (with broadcast)
- const auto in_tensor = env->tensorAt(in_index);
- const auto out_tensor = env->tensorAt(out_index);
-
const auto data_type = in_tensor->data_type();
if (data_type == ir::DataType::FLOAT32)
{
- invoke(in_tensor, out_tensor, avgpool_node.param());
+ calculateActivationRange(param.activation, &cker_param.float_activation_min,
+ &cker_param.float_activation_max);
+
+ const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
+ const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
+ const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
+ float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
+ // Now, invoke() supports only Pool2D in float
+ invoke<float>(cker_param, in_shape, in_ptr, out_shape, out_ptr, param.op_type);
}
else
{
throw std::runtime_error{"NYI: Support float only"};
}
}
-} // namespace avgpool2d
+} // namespace pool2d
-OpKernel *getAvgPool2D()
+OpKernel *getPool2D()
{
- static OpKernel kernel = {avgpool2d::prepareAvgPool2D, avgpool2d::invokeAvgPool2D};
+ static OpKernel kernel = {pool2d::preparePool2D, pool2d::invokePool2DOps};
return &kernel;
}
diff --git a/runtime/onert/core/src/interp/operations/Softmax.cc b/runtime/onert/core/src/interp/operations/Softmax.cc
index 6d9359e1e..d30f78deb 100644
--- a/runtime/onert/core/src/interp/operations/Softmax.cc
+++ b/runtime/onert/core/src/interp/operations/Softmax.cc
@@ -29,43 +29,6 @@ namespace interp
namespace
{
-void Softmax2D(const float *in, const int input_size, const int batch_size, const float beta,
- float *out)
-{
- assert(input_size > 0);
-
- // For each batch
- for (int b = 0; b < batch_size; b++)
- {
- // Find the max coeff.
- float max_coeff = in[0];
- for (int i = 1; i < input_size; i++)
- {
- if (in[i] > max_coeff)
- max_coeff = in[i];
- }
-
- // Compute the normalized sum of exps.
- float exp_sum = 0.0;
- for (int i = 0; i < input_size; i++)
- {
- out[i] = std::exp((in[i] - max_coeff) * beta);
- exp_sum += out[i];
- }
-
- // Divide by the sum of exps.
- float reciprocal_sum_exp = 1.f / exp_sum;
- for (int i = 0; i < input_size; i++)
- {
- out[i] *= reciprocal_sum_exp;
- }
-
- // Advance in and out pointers for the next batch.
- in += input_size;
- out += input_size;
- }
-}
-
void prepareSoftMax(ExecEnv *env, const ir::Operation &node)
{
const auto in_index = node.getInputs().at(0);
@@ -108,7 +71,7 @@ void invoke(const ITensor *in_tensor, const ITensor *out_tensor,
uint32_t batch_size = in_tensor->dimension(0);
uint32_t input_size = in_tensor->dimension(1);
- Softmax2D(in_ptr, input_size, batch_size, beta, out_ptr);
+ nnfw::cker::Softmax(in_ptr, input_size, batch_size, beta, out_ptr);
}
else if (in_tensor->num_dimensions() == 4)
{
diff --git a/runtime/onert/core/src/ir/Graph.cc b/runtime/onert/core/src/ir/Graph.cc
index 0db9b6133..fe8b1b443 100644
--- a/runtime/onert/core/src/ir/Graph.cc
+++ b/runtime/onert/core/src/ir/Graph.cc
@@ -56,18 +56,34 @@ void Graph::setOperandValue(const OperandIndex &ind, std::shared_ptr<Data> data)
_operands.at(ind).data(std::move(data));
}
-void Graph::addInput(const OperandIndex &ind)
+void Graph::addInput(const OperandIndex &ind, const std::string &name)
{
assert(isBuildingPhase());
+ if (!name.empty())
+ _name_to_input.emplace(name, IOIndex{_inputs.size()});
_inputs.append(ind);
}
-void Graph::addOutput(const OperandIndex &ind)
+void Graph::addOutput(const OperandIndex &ind, const std::string &name)
{
assert(isBuildingPhase());
+ if (!name.empty())
+ _name_to_output.emplace(name, IOIndex{_outputs.size()});
_outputs.append(ind);
}
+IOIndex Graph::getInputIndex(const std::string &name) const
+{
+ auto itr = _name_to_input.find(name);
+ return (itr == _name_to_input.end()) ? IOIndex{} : itr->second;
+}
+
+IOIndex Graph::getOutputIndex(const std::string &name) const
+{
+ auto itr = _name_to_output.find(name);
+ return (itr == _name_to_output.end()) ? IOIndex{} : itr->second;
+}
+
void Graph::finishBuilding(void)
{
assert(isBuildingPhase());
diff --git a/runtime/onert/core/src/ir/GraphIterator.cc b/runtime/onert/core/src/ir/GraphIterator.cc
index 2b29a9ea9..4bea1a55d 100644
--- a/runtime/onert/core/src/ir/GraphIterator.cc
+++ b/runtime/onert/core/src/ir/GraphIterator.cc
@@ -17,7 +17,7 @@
#include "GraphIterator.h"
#include "ir/OperationIndexMap.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
namespace onert
{
diff --git a/runtime/onert/core/src/ir/GraphIterator.h b/runtime/onert/core/src/ir/GraphIterator.h
index 534ffef80..b54314e0e 100644
--- a/runtime/onert/core/src/ir/GraphIterator.h
+++ b/runtime/onert/core/src/ir/GraphIterator.h
@@ -23,12 +23,19 @@
namespace onert
{
+namespace compiler
+{
+class LoweredGraph;
+} // namespace compiler
+} // namespace onert
+
+namespace onert
+{
namespace ir
{
class Graph;
class Operation;
-class LoweredGraph;
class OpSequence;
template <bool is_const> class Iterator
@@ -65,7 +72,8 @@ public:
using NodeRef = typename Iterator<is_const>::NodeRef;
using IterFn = typename Iterator<is_const>::IterFn;
using LoweredGraphRef =
- typename std::conditional<is_const, const LoweredGraph &, LoweredGraph &>::type;
+ typename std::conditional<is_const, const typename compiler::LoweredGraph &,
+ typename compiler::LoweredGraph &>::type;
using OpSequenceRef = typename std::conditional<is_const, const OpSequence &, OpSequence &>::type;
using OpSeqIndexRef = const OpSequenceIndex &;
using OpSeqIterFn = std::function<void(OpSeqIndexRef, OpSequenceRef)>;
diff --git a/runtime/onert/core/src/ir/OpSequences.cc b/runtime/onert/core/src/ir/OpSequences.cc
index a87d31a9f..68884783e 100644
--- a/runtime/onert/core/src/ir/OpSequences.cc
+++ b/runtime/onert/core/src/ir/OpSequences.cc
@@ -83,15 +83,6 @@ OpSequenceIndex OpSequences::getOperation(const OperationIndex &operation_index)
return ret;
}
-// TODO: Extract this into external helper function
-void OpSequences::dump(const std::string &msg, const Operations &operations) const
-{
- VERBOSE(OpSequences) << "OpSequences(" << msg << ")" << std::endl;
- iterate([&](const OpSequenceIndex &idx, const OpSequence &op_seq) {
- VERBOSE(OpSequences) << idx.value() << "] " << getStrFromOpSeq(op_seq, operations) << std::endl;
- });
-}
-
void OpSequences::removeFromOpSequence(const OperationIndex &operation_index)
{
const auto op_seq_index = findOperation(operation_index);
@@ -122,5 +113,12 @@ OpSequenceIndex OpSequences::findOperation(const OperationIndex &operation_index
throw std::runtime_error("Operation not found");
}
+void dumpOpSequences(const OpSequences &op_seqs, const Operations &operations)
+{
+ op_seqs.iterate([&](const OpSequenceIndex &idx, const OpSequence &op_seq) {
+ VERBOSE(OpSequences) << idx.value() << "] " << getStrFromOpSeq(op_seq, operations) << std::endl;
+ });
+}
+
} // namespace ir
} // namespace onert
diff --git a/runtime/onert/core/src/ir/OperationDumper.cc b/runtime/onert/core/src/ir/OperationDumper.cc
index e3cbce57a..48361f464 100644
--- a/runtime/onert/core/src/ir/OperationDumper.cc
+++ b/runtime/onert/core/src/ir/OperationDumper.cc
@@ -27,206 +27,137 @@ namespace ir
using namespace operation;
-OperationDumper::OperationDumper(const std::string &start_msg)
+namespace
{
- VERBOSE(LIR) << start_msg << std::endl;
-}
-
-void OperationDumper::visit(const Abs &node)
+void dumpUnaryInputOp(const Operation &node, const std::string &adding_input = "")
{
- VERBOSE(LIR) << "* Abs" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Abs::Input::INPUT) << ")"
+ VERBOSE(LIR) << "* " << node.name() << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(0) << ") " << adding_input
<< std::endl;
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const Add &node)
+void dumpBinaryInputOp(const Operation &node, const std::string &adding_input = "")
{
- VERBOSE(LIR) << "* Add" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Add::Input::LHS) << ", "
- << node.getInputs().at(Add::Input::RHS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const ArgMax &node)
-{
- VERBOSE(LIR) << "* ArgMax" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ArgMax::Input::INPUT) << ")"
- << std::endl;
+ VERBOSE(LIR) << "* " << node.name() << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(0) << ", " << node.getInputs().at(0)
+ << ") " << adding_input << std::endl;
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const AvgPool2D &node)
+void dumpConvOp(const Operation &node, const std::string &padding_type)
{
- VERBOSE(LIR) << "* AvgPool2D(Implicit)" << std::endl;
- VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(AvgPool2D::Input::INPUT) << ")"
- << std::endl;
+ VERBOSE(LIR) << "* " << node.name() << "(" << padding_type << ")" << std::endl;
+ VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(Conv2D::Input::INPUT) << ") Kernel("
+ << node.getInputs().at(Conv2D::Input::KERNEL) << ") Bias("
+ << node.getInputs().at(Conv2D::Input::BIAS) << ")" << std::endl;
VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const BatchToSpaceND &node)
+void dumpPackingOp(const Operation &node)
{
- VERBOSE(LIR) << "* BatchToSpaceND" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(BatchToSpaceND::Input::INPUT) << ")"
- << " BlockSize(" << node.getInputs().at(BatchToSpaceND::Input::BLOCK_SIZE) << ")"
- << std::endl;
+ VERBOSE(LIR) << "* " << node.name() << std::endl;
+ std::string inputs;
+ for (auto i : node.getInputs())
+ {
+ inputs += std::to_string(i.value()) + ",";
+ }
+ VERBOSE(LIR) << " - Inputs : Inputs(" << inputs << ")" << std::endl;
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
}
+} // namespace
-void OperationDumper::visit(const operation::BroadcastTo &node)
+OperationDumper::OperationDumper(const std::string &start_msg)
{
- VERBOSE(LIR) << "* BroadcastTo" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(BroadcastTo::Input::INPUT) << ", "
- << node.getInputs().at(BroadcastTo::Input::SHAPE) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ VERBOSE(LIR) << start_msg << std::endl;
}
-void OperationDumper::visit(const Cast &node)
-{
- VERBOSE(LIR) << "* Cast" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Cast::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const ArgMax &node) { dumpUnaryInputOp(node); }
-void OperationDumper::visit(const Comparison &node)
+void OperationDumper::visit(const BatchToSpaceND &node)
{
- VERBOSE(LIR) << "* Comparison" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Comparison::Input::INPUT0) << ", "
- << node.getInputs().at(Comparison::Input::INPUT1) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string block_size =
+ "BlockSize(" +
+ std::to_string(node.getInputs().at(BatchToSpaceND::Input::BLOCK_SIZE).value()) + ")";
+ dumpUnaryInputOp(node, block_size);
}
-void OperationDumper::visit(const Concat &node)
-{
- VERBOSE(LIR) << "* Concat" << std::endl;
- std::string inputs;
- for (auto i : node.getInputs())
- {
- inputs += std::to_string(i.value()) + ",";
- }
- VERBOSE(LIR) << " - Inputs : IFM(" << inputs << ")" << std::endl;
- VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const BinaryArithmetic &node) { dumpBinaryInputOp(node); }
+
+void OperationDumper::visit(const operation::BroadcastTo &node) { dumpBinaryInputOp(node); }
+
+void OperationDumper::visit(const Comparison &node) { dumpBinaryInputOp(node); }
+
+void OperationDumper::visit(const Concat &node) { dumpPackingOp(node); }
void OperationDumper::visit(const Conv2D &node)
{
std::string padding_type =
node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
- VERBOSE(LIR) << "* Conv2D(" << padding_type << ")" << std::endl;
- VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(Conv2D::Input::INPUT) << ") Kernel("
- << node.getInputs().at(Conv2D::Input::KERNEL) << ") Bias("
- << node.getInputs().at(Conv2D::Input::BIAS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
+ dumpConvOp(node, padding_type);
}
-void OperationDumper::visit(const ConvertFp16ToFp32 &node)
-{
- VERBOSE(LIR) << "* ConvertFp16ToFp32" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ConvertFp16ToFp32::Input::INPUT)
- << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const ConvertFp16ToFp32 &node) { dumpUnaryInputOp(node); }
-void OperationDumper::visit(const ConvertFp32ToFp16 &node)
-{
- VERBOSE(LIR) << "* ConvertFp32ToFp16" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ConvertFp32ToFp16::Input::INPUT)
- << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Cos &node)
-{
- VERBOSE(LIR) << "* Cos" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Cos::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const ConvertFp32ToFp16 &node) { dumpUnaryInputOp(node); }
-void OperationDumper::visit(const DepthToSpace &node)
-{
- VERBOSE(LIR) << "* DepthToSpace" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(DepthToSpace::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const DepthToSpace &node) { dumpUnaryInputOp(node); }
void OperationDumper::visit(const DepthwiseConv2D &node)
{
std::string padding_type =
node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
- VERBOSE(LIR) << "* DepthwiseConv2D(" << padding_type << ")" << std::endl;
- VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(DepthwiseConv2D::Input::INPUT)
- << ") Kernel(" << node.getInputs().at(DepthwiseConv2D::Input::KERNEL) << ") Bias("
- << node.getInputs().at(DepthwiseConv2D::Input::BIAS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
+ dumpConvOp(node, padding_type);
}
-void OperationDumper::visit(const Dequantize &node)
+void OperationDumper::visit(const ElementwiseActivation &node)
{
- VERBOSE(LIR) << "* Dequantize" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Dequantize::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string params;
+ if (node.param().op_type == ElementwiseActivation::Type::RELU)
+ {
+ params = " lower value(" + std::to_string(node.param().alpha) + ") upper value(" +
+ std::to_string(node.param().beta) + ")";
+ }
+ else if (node.param().op_type == ElementwiseActivation::Type::LEAKY_RELU)
+ {
+ params = " alpha value(" + std::to_string(node.param().alpha) + ")";
+ }
+ dumpUnaryInputOp(node, params);
}
-void OperationDumper::visit(const Div &node)
-{
- VERBOSE(LIR) << "* Div" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Div::Input::LHS) << ", "
- << node.getInputs().at(Div::Input::RHS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const ElementwiseBinary &node) { dumpBinaryInputOp(node); }
+
+void OperationDumper::visit(const ElementwiseUnary &node) { dumpUnaryInputOp(node); }
void OperationDumper::visit(const EmbeddingLookup &node)
{
- VERBOSE(LIR) << "* EmbeddingLookup" << std::endl;
+ VERBOSE(LIR) << "* " << node.name() << std::endl;
VERBOSE(LIR) << " - Inputs : Lookups(" << node.getInputs().at(EmbeddingLookup::Input::LOOKUPS)
<< ") VALUES(" << node.getInputs().at(EmbeddingLookup::Input::VALUES) << ")"
<< std::endl;
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const Exp &node)
-{
- VERBOSE(LIR) << "* Exp" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Exp::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
void OperationDumper::visit(const ExpandDims &node)
{
- VERBOSE(LIR) << "* ExpandDims" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ExpandDims::Input::INPUT)
- << ") AXIS(" << node.getInputs().at(ExpandDims::Input::AXIS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Floor &node)
-{
- VERBOSE(LIR) << "* Floor" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Floor::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string axis =
+ "AXIS(" + std::to_string(node.getInputs().at(ExpandDims::Input::AXIS).value()) + ")";
+ dumpUnaryInputOp(node, axis);
}
void OperationDumper::visit(const FullyConnected &node)
{
- VERBOSE(LIR) << "* FullyConnected" << std::endl;
- VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(FullyConnected::Input::INPUT)
- << ") Weight(" << node.getInputs().at(FullyConnected::Input::WEIGHT) << ") Bias("
- << node.getInputs().at(FullyConnected::Input::BIAS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string inputs =
+ "Weight(" + std::to_string(node.getInputs().at(FullyConnected::Input::WEIGHT).value()) +
+ ") Bias(" + std::to_string(node.getInputs().at(FullyConnected::Input::BIAS).value()) + ")";
+ dumpUnaryInputOp(node, inputs);
}
void OperationDumper::visit(const Gather &node)
{
- VERBOSE(LIR) << "* Gather" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Gather::Input::INPUT) << ") Indices("
- << node.getInputs().at(Gather::Input::INDICES) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string indices =
+ "Indices(" + std::to_string(node.getInputs().at(Gather::Input::INDICES).value()) + ")";
+ dumpUnaryInputOp(node, indices);
}
void OperationDumper::visit(const HashtableLookup &node)
@@ -242,36 +173,15 @@ void OperationDumper::visit(const HashtableLookup &node)
void OperationDumper::visit(const InstanceNorm &node)
{
- VERBOSE(LIR) << "* InstanceNorm" << std::endl;
- VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(InstanceNorm::Input::INPUT)
- << ") Gamma(" << node.getInputs().at(InstanceNorm::Input::GAMMA) << ") Beta("
- << node.getInputs().at(InstanceNorm::Input::BETA) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const L2Normalization &node)
-{
- VERBOSE(LIR) << "* L2Normalization" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(L2Normalization::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string inputs =
+ "Gamma(" + std::to_string(node.getInputs().at(InstanceNorm::Input::GAMMA).value()) +
+ ") Beta(" + std::to_string(node.getInputs().at(InstanceNorm::Input::BETA).value()) + ")";
+ dumpUnaryInputOp(node, inputs);
}
-void OperationDumper::visit(const L2Pool2D &node)
-{
- VERBOSE(LIR) << "* L2Pool2D" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(L2Pool2D::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const L2Normalization &node) { dumpUnaryInputOp(node); }
-void OperationDumper::visit(const LocalResponseNormalization &node)
-{
- VERBOSE(LIR) << "* LocalResponseNormalization" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input("
- << node.getInputs().at(LocalResponseNormalization::Input::INPUT) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const LocalResponseNormalization &node) { dumpUnaryInputOp(node); }
void OperationDumper::visit(const LSTM &node)
{
@@ -307,93 +217,12 @@ void OperationDumper::visit(const LSTM &node)
<< node.getInputs().at(LSTM::Output::OUTPUT) << ")" << std::endl;
}
-void OperationDumper::visit(const Log &node)
-{
- VERBOSE(LIR) << "* Log" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Log::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const LogicalAnd &node)
-{
- VERBOSE(LIR) << "* LogicalAnd" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(LogicalAnd::Input::INPUT0) << ", "
- << node.getInputs().at(LogicalAnd::Input::INPUT1) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const LogicalNot &node)
-{
- VERBOSE(LIR) << "* LogicalNot" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(LogicalNot::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const LogicalOr &node)
-{
- VERBOSE(LIR) << "* LogicalOr" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(LogicalOr::Input::INPUT0) << ", "
- << node.getInputs().at(LogicalOr::Input::INPUT1) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Logistic &node)
-{
- VERBOSE(LIR) << "* Logistic" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Logistic::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const MaxPool2D &node)
-{
- std::string padding_type =
- node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
- VERBOSE(LIR) << "* MaxPool2D(" << padding_type << ")" << std::endl;
- VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(MaxPool2D::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Mul &node)
-{
- VERBOSE(LIR) << "* Mul" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Mul::Input::LHS) << ", "
- << node.getInputs().at(Mul::Input::RHS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Neg &node)
-{
- VERBOSE(LIR) << "* Neg" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Neg::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Pack &node)
-{
- VERBOSE(LIR) << "* Pack" << std::endl;
- std::string inputs;
- const auto &input_indices = node.getInputs();
- for (auto it = std::begin(input_indices); it != std::end(input_indices); ++it)
- {
- inputs += std::to_string(it->value());
- if (std::next(it) != std::end(input_indices))
- inputs += ", ";
- }
- VERBOSE(LIR) << " - Inputs : Inputs(" << inputs << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Pack &node) { dumpPackingOp(node); }
void OperationDumper::visit(const Pad &node)
{
- VERBOSE(LIR) << "* Pad" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Pad::Input::INPUT) << ") Pad("
- << node.getInputs().at(Pad::Input::PAD) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string pad = "Pad(" + std::to_string(node.getInputs().at(Pad::Input::PAD).value()) + ")";
+ dumpUnaryInputOp(node, pad);
}
void OperationDumper::visit(const Permute &node)
@@ -417,86 +246,46 @@ void OperationDumper::visit(const Permute &node)
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const Pow &node)
+void OperationDumper::visit(const Pool2D &node)
{
- VERBOSE(LIR) << "* Pow" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Pow::Input::LHS) << ", "
- << node.getInputs().at(Pow::Input::RHS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const PReLU &node)
-{
- VERBOSE(LIR) << "* PReLU" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(PReLU::Input::INPUT) << ") Alpha("
- << node.getInputs().at(PReLU::Input::ALPHA) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Reduce &node)
-{
- VERBOSE(LIR) << "* " + node.name() << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Reduce::Input::INPUT) << ")"
+ std::string padding_type =
+ node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
+ VERBOSE(LIR) << "* " << node.name() << "(" << padding_type << ")" << std::endl;
+ VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(Pool2D::Input::INPUT) << ")"
<< std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const ReLU &node)
-{
- VERBOSE(LIR) << "* ReLU" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ReLU::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Pow &node) { dumpBinaryInputOp(node); }
-void OperationDumper::visit(const ReLU1 &node)
+void OperationDumper::visit(const PReLU &node)
{
- VERBOSE(LIR) << "* ReLU1" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ReLU1::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string alpha =
+ "Alpha(" + std::to_string(node.getInputs().at(PReLU::Input::ALPHA).value()) + ")";
+ dumpUnaryInputOp(node, alpha);
}
-void OperationDumper::visit(const ReLU6 &node)
-{
- VERBOSE(LIR) << "* ReLU6" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ReLU6::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Rank &node) { dumpUnaryInputOp(node); }
+
+void OperationDumper::visit(const Reduce &node) { dumpUnaryInputOp(node); }
void OperationDumper::visit(const Reshape &node)
{
- VERBOSE(LIR) << "* Reshape" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Reshape::Input::INPUT) << ")";
// optional param
- if (node.getInputs().size() == 2)
- {
- VERBOSE(LIR) << " Shape(" << node.getInputs().at(Reshape::Input::SHAPE) << ")";
- }
- else
- {
- VERBOSE(LIR) << " Shape(not provided)";
- }
- VERBOSE(LIR) << std::endl;
-
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string shape =
+ node.getInputs().size() == 2
+ ? "Shape(" + std::to_string(node.getInputs().at(Reshape::Input::SHAPE).value()) + ")"
+ : "Shape(not provided)";
+ dumpUnaryInputOp(node, shape);
}
-void OperationDumper::visit(const ResizeBilinear &node)
-{
- VERBOSE(LIR) << "* ResizeBilinear" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ResizeBilinear::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const ResizeBilinear &node) { dumpUnaryInputOp(node); }
void OperationDumper::visit(const Reverse &node)
{
- VERBOSE(LIR) << "* Reverse" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Reverse::Input::INPUT) << ") Axis("
- << node.getInputs().at(Reverse::Input::AXIS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string axis =
+ "Axis(" + std::to_string(node.getInputs().at(Reverse::Input::AXIS).value()) + ")";
+ dumpUnaryInputOp(node, axis);
}
void OperationDumper::visit(const RNN &node)
@@ -512,162 +301,65 @@ void OperationDumper::visit(const RNN &node)
<< std::endl;
}
-void OperationDumper::visit(const Round &node)
-{
- VERBOSE(LIR) << "* Round" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Round::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
void OperationDumper::visit(const Range &node)
{
VERBOSE(LIR) << "* Range" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Range::Input::START) << ")"
+ VERBOSE(LIR) << " - Inputs : Start(" << node.getInputs().at(Range::Input::START) << ")"
<< " Limit(" << node.getInputs().at(Range::Input::LIMIT) << ")"
<< " Delta(" << node.getInputs().at(Range::Input::DELTA) << ")" << std::endl;
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const RSQRT &node)
-{
- VERBOSE(LIR) << "* RSQRT" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(RSQRT::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
void OperationDumper::visit(const Select &node)
{
VERBOSE(LIR) << "* Select" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Select::Input::CONDITION) << ")"
+ VERBOSE(LIR) << " - Inputs : Condition(" << node.getInputs().at(Select::Input::CONDITION) << ")"
<< " Input_X(" << node.getInputs().at(Select::Input::INPUT_TRUE) << ")"
<< " Input_Y(" << node.getInputs().at(Select::Input::INPUT_FALSE) << ")"
<< std::endl;
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const ir::operation::Shape &node)
-{
- VERBOSE(LIR) << "* Shape" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ir::operation::Shape::Input::INPUT)
- << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Sin &node)
-{
- VERBOSE(LIR) << "* Sin" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Sin::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const ir::operation::Shape &node) { dumpUnaryInputOp(node); }
-void OperationDumper::visit(const Softmax &node)
-{
- VERBOSE(LIR) << "* Softmax" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Softmax::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Softmax &node) { dumpUnaryInputOp(node); }
void OperationDumper::visit(const SpaceToBatchND &node)
{
- VERBOSE(LIR) << "* SpaceToBatchND" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(SpaceToBatchND::Input::INPUT)
- << ") BlockSize(" << node.getInputs().at(SpaceToBatchND::Input::BLOCK_SIZE)
- << ") Paddings(" << node.getInputs().at(SpaceToBatchND::Input::PADDINGS) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string inputs =
+ "BlockSize(" +
+ std::to_string(node.getInputs().at(SpaceToBatchND::Input::BLOCK_SIZE).value()) +
+ ") Paddings(" + std::to_string(node.getInputs().at(SpaceToBatchND::Input::PADDINGS).value()) +
+ ")";
+ dumpUnaryInputOp(node, inputs);
}
-void OperationDumper::visit(const SpaceToDepth &node)
-{
- VERBOSE(LIR) << "* SpaceToDepth" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(SpaceToDepth::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const SpaceToDepth &node) { dumpUnaryInputOp(node); }
-void OperationDumper::visit(const Split &node)
-{
- VERBOSE(LIR) << "* Split" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Split::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const SQRT &node)
-{
- VERBOSE(LIR) << "* SQRT" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(SQRT::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Split &node) { dumpUnaryInputOp(node); }
-void OperationDumper::visit(const SquaredDifference &node)
-{
- VERBOSE(LIR) << "* SquaredDifference" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(SquaredDifference::Input::LHS)
- << ", " << node.getInputs().at(SquaredDifference::Input::RHS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const SquaredDifference &node) { dumpBinaryInputOp(node); }
void OperationDumper::visit(const StatelessRandomUniform &node)
{
VERBOSE(LIR) << "* StatelessRandomUniform" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(StatelessRandomUniform::Input::SHAPE)
- << ", " << node.getInputs().at(StatelessRandomUniform::Input::SEED) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Squeeze &node)
-{
- VERBOSE(LIR) << "* Squeeze" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Squeeze::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Slice &node)
-{
- VERBOSE(LIR) << "* Slice" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Slice::Input::INPUT) << ")"
+ VERBOSE(LIR) << " - Inputs : Shape(" << node.getInputs().at(StatelessRandomUniform::Input::SHAPE)
+ << " Seed(" << node.getInputs().at(StatelessRandomUniform::Input::SEED) << ")"
<< std::endl;
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const StridedSlice &node)
-{
- VERBOSE(LIR) << "* StridedSlice" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(StridedSlice::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Squeeze &node) { dumpUnaryInputOp(node); }
-void OperationDumper::visit(const Sub &node)
-{
- VERBOSE(LIR) << "* Sub" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Sub::Input::LHS) << ", "
- << node.getInputs().at(Sub::Input::RHS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Slice &node) { dumpUnaryInputOp(node); }
-void OperationDumper::visit(const Tanh &node)
-{
- VERBOSE(LIR) << "* TanH" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Tanh::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const StridedSlice &node) { dumpUnaryInputOp(node); }
void OperationDumper::visit(const Tile &node)
{
- VERBOSE(LIR) << "* Tile" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Tile::Input::INPUT) << ", "
- << node.getInputs().at(Tile::Input::MULTIPLES) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ std::string multiples =
+ "Multiples(" + std::to_string(node.getInputs().at(Tile::Input::MULTIPLES).value()) + ")";
+ dumpUnaryInputOp(node, multiples);
}
void OperationDumper::visit(const TopKV2 &node)
@@ -692,17 +384,11 @@ void OperationDumper::visit(const TransposeConv &node)
VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const Transpose &node)
-{
- VERBOSE(LIR) << "* Transpose" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Transpose::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Transpose &node) { dumpUnaryInputOp(node); }
void OperationDumper::visit(const Unpack &node)
{
- VERBOSE(LIR) << "* Unpack" << std::endl;
+ VERBOSE(LIR) << "* " << node.name() << std::endl;
VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Unpack::Input::INPUT) << ")"
<< std::endl;
std::string outputs;
@@ -716,25 +402,9 @@ void OperationDumper::visit(const Unpack &node)
VERBOSE(LIR) << " - Outputs : Outputs(" << outputs << ")" << std::endl;
}
-void OperationDumper::visit(const Min &node)
-{
- VERBOSE(LIR) << "* Min" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Min::Input::LHS) << ", "
- << node.getInputs().at(Min::Input::RHS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Max &node)
-{
- VERBOSE(LIR) << "* Max" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Max::Input::LHS) << ", "
- << node.getInputs().at(Max::Input::RHS) << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
void OperationDumper::visit(const OneHot &node)
{
- VERBOSE(LIR) << "* OneHot" << std::endl;
+ VERBOSE(LIR) << "* " << node.name() << std::endl;
VERBOSE(LIR) << " - Inputs : "
<< "Indices(" << node.getInputs().at(OneHot::Input::INDICES) << ") " << std::endl;
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
@@ -742,7 +412,7 @@ void OperationDumper::visit(const OneHot &node)
void OperationDumper::visit(const If &node)
{
- VERBOSE(LIR) << "* If" << std::endl;
+ VERBOSE(LIR) << "* " << node.name() << std::endl;
std::string inputs;
const auto &input_indices = node.getInputs();
for (auto it = std::begin(input_indices); it != std::end(input_indices); ++it)
@@ -767,7 +437,7 @@ void OperationDumper::visit(const If &node)
void OperationDumper::visit(const While &node)
{
- VERBOSE(LIR) << "* While" << std::endl;
+ VERBOSE(LIR) << "* " << node.name() << std::endl;
std::string inputs;
const auto &input_indices = node.getInputs();
for (auto it = std::begin(input_indices); it != std::end(input_indices); ++it)
@@ -790,13 +460,5 @@ void OperationDumper::visit(const While &node)
VERBOSE(LIR) << " - Output : Outputs(" << outputs << ")" << std::endl;
}
-void OperationDumper::visit(const ZerosLike &node)
-{
- VERBOSE(LIR) << "* RoZerosLike" << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ZerosLike::Input::INPUT) << ")"
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
} // namespace ir
} // namespace onert
diff --git a/runtime/onert/core/src/ir/OperationDumper.h b/runtime/onert/core/src/ir/OperationDumper.h
index d83f1493f..e8ab3b3cd 100644
--- a/runtime/onert/core/src/ir/OperationDumper.h
+++ b/runtime/onert/core/src/ir/OperationDumper.h
@@ -31,85 +31,61 @@ public:
OperationDumper(const std::string &start_msg);
public:
- void visit(const operation::Abs &) override;
- void visit(const operation::Add &node) override;
void visit(const operation::ArgMax &) override;
- void visit(const operation::AvgPool2D &node) override;
void visit(const operation::BatchToSpaceND &node) override;
+ void visit(const operation::BinaryArithmetic &node) override;
void visit(const operation::BroadcastTo &) override;
- void visit(const operation::Cast &) override;
void visit(const operation::Comparison &) override;
void visit(const operation::Concat &node) override;
void visit(const operation::Conv2D &node) override;
void visit(const operation::ConvertFp16ToFp32 &node) override;
void visit(const operation::ConvertFp32ToFp16 &node) override;
- void visit(const operation::Cos &node) override;
void visit(const operation::DepthToSpace &) override;
void visit(const operation::DepthwiseConv2D &node) override;
- void visit(const operation::Dequantize &) override;
- void visit(const operation::Div &) override;
+ void visit(const operation::ElementwiseActivation &) override;
+ void visit(const operation::ElementwiseBinary &) override;
+ void visit(const operation::ElementwiseUnary &) override;
void visit(const operation::EmbeddingLookup &) override;
- void visit(const operation::Exp &) override;
void visit(const operation::ExpandDims &) override;
- void visit(const operation::Floor &) override;
void visit(const operation::FullyConnected &node) override;
void visit(const operation::Gather &) override;
void visit(const operation::HashtableLookup &) override;
void visit(const operation::InstanceNorm &) override;
void visit(const operation::L2Normalization &) override;
- void visit(const operation::L2Pool2D &) override;
void visit(const operation::LocalResponseNormalization &) override;
- void visit(const operation::Log &) override;
- void visit(const operation::LogicalAnd &) override;
- void visit(const operation::LogicalNot &) override;
- void visit(const operation::LogicalOr &) override;
- void visit(const operation::Logistic &) override;
void visit(const operation::LSTM &) override;
- void visit(const operation::MaxPool2D &node) override;
- void visit(const operation::Mul &) override;
- void visit(const operation::Neg &) override;
void visit(const operation::Pack &) override;
void visit(const operation::Pad &) override;
void visit(const operation::Permute &node) override;
+ void visit(const operation::Pool2D &node) override;
void visit(const operation::Pow &node) override;
void visit(const operation::PReLU &) override;
void visit(const operation::Range &) override;
+ void visit(const operation::Rank &) override;
void visit(const operation::Reduce &) override;
- void visit(const operation::ReLU &) override;
- void visit(const operation::ReLU1 &) override;
- void visit(const operation::ReLU6 &) override;
void visit(const operation::Reshape &node) override;
void visit(const operation::ResizeBilinear &) override;
void visit(const operation::Reverse &) override;
void visit(const operation::RNN &) override;
- void visit(const operation::Round &) override;
- void visit(const operation::RSQRT &) override;
void visit(const operation::Select &node) override;
void visit(const operation::Shape &node) override;
- void visit(const operation::Sin &node) override;
void visit(const operation::Softmax &node) override;
void visit(const operation::SpaceToBatchND &) override;
void visit(const operation::SpaceToDepth &) override;
void visit(const operation::Split &) override;
- void visit(const operation::SQRT &) override;
void visit(const operation::SquaredDifference &) override;
void visit(const operation::Squeeze &) override;
void visit(const operation::Slice &) override;
void visit(const operation::StridedSlice &) override;
void visit(const operation::StatelessRandomUniform &) override;
- void visit(const operation::Sub &) override;
- void visit(const operation::Tanh &) override;
void visit(const operation::Tile &) override;
void visit(const operation::TopKV2 &) override;
void visit(const operation::TransposeConv &) override;
void visit(const operation::Transpose &) override;
void visit(const operation::Unpack &) override;
- void visit(const operation::Min &) override;
- void visit(const operation::Max &) override;
void visit(const operation::OneHot &) override;
void visit(const operation::If &) override;
void visit(const operation::While &) override;
- void visit(const operation::ZerosLike &) override;
};
} // namespace ir
diff --git a/runtime/onert/core/src/ir/Padding.cc b/runtime/onert/core/src/ir/Padding.cc
index 31969911f..d74f80217 100644
--- a/runtime/onert/core/src/ir/Padding.cc
+++ b/runtime/onert/core/src/ir/Padding.cc
@@ -50,7 +50,7 @@ inline ExplicitPadding validPadding(void)
}
inline ExplicitPadding samePaddingUsingIFM(const FeatureShape &ifm_shape, const Stride &stride,
- uint32_t kw, uint32_t kh)
+ uint32_t kw, uint32_t kh, uint32_t dwf, uint32_t dhf)
{
ExplicitPadding padding;
@@ -61,14 +61,19 @@ inline ExplicitPadding samePaddingUsingIFM(const FeatureShape &ifm_shape, const
// padding_to_beginning = total_padding / 2
// padding_to_end = (total_padding + 1)/2.
//
+ const int32_t effective_filter_h_size = (kh - 1) * dhf + 1;
+ const int32_t effective_filter_w_size = (kw - 1) * dwf + 1;
+
const int32_t vertical_expected_output = (ifm_shape.H + stride.vertical - 1) / stride.vertical;
const int32_t horizontal_expected_output =
(ifm_shape.W + stride.horizontal - 1) / stride.horizontal;
- const int32_t vertical_needed_input = (vertical_expected_output - 1) * stride.vertical + kh;
+ const int32_t vertical_needed_input =
+ (vertical_expected_output - 1) * stride.vertical + effective_filter_h_size;
const int32_t vertical_total_padding = std::max(0, vertical_needed_input - ifm_shape.H);
- const int32_t horizontal_needed_input = (horizontal_expected_output - 1) * stride.horizontal + kw;
+ const int32_t horizontal_needed_input =
+ (horizontal_expected_output - 1) * stride.horizontal + effective_filter_w_size;
const int32_t horizontal_total_padding = std::max(0, horizontal_needed_input - ifm_shape.W);
padding.top = vertical_total_padding / 2;
@@ -80,7 +85,8 @@ inline ExplicitPadding samePaddingUsingIFM(const FeatureShape &ifm_shape, const
}
inline ExplicitPadding samePadding(const FeatureShape &ifm_shape, const FeatureShape &ofm_shape,
- const Stride &stride, uint32_t kw, uint32_t kh)
+ const Stride &stride, uint32_t kw, uint32_t kh, uint32_t dwf,
+ uint32_t dhf)
{
const int32_t vertical_expected_output = (ifm_shape.H + stride.vertical - 1) / stride.vertical;
const int32_t horizontal_expected_output =
@@ -92,7 +98,7 @@ inline ExplicitPadding samePadding(const FeatureShape &ifm_shape, const FeatureS
UNUSED_RELEASE(vertical_expected_output);
UNUSED_RELEASE(horizontal_expected_output);
- return samePaddingUsingIFM(ifm_shape, stride, kw, kh);
+ return samePaddingUsingIFM(ifm_shape, stride, kw, kh, dwf, dhf);
}
} // namespace
@@ -130,7 +136,7 @@ Padding::Padding(uint32_t left, uint32_t right, uint32_t top, uint32_t bottom)
const ExplicitPadding calculatePadding(const Padding &padding, const FeatureShape &ifm_shape,
const FeatureShape &ofm_shape, const Stride &stride,
- uint32_t kw, uint32_t kh)
+ uint32_t kw, uint32_t kh, uint32_t dwf, uint32_t dhf)
{
if (padding.type == PaddingType::EXPLICIT)
{
@@ -138,7 +144,7 @@ const ExplicitPadding calculatePadding(const Padding &padding, const FeatureShap
}
else if (padding.type == PaddingType::SAME)
{
- return samePadding(ifm_shape, ofm_shape, stride, kw, kh);
+ return samePadding(ifm_shape, ofm_shape, stride, kw, kh, dwf, dhf);
}
else if (padding.type == PaddingType::VALID)
{
diff --git a/runtime/onert/core/src/ir/operation/Abs.cc b/runtime/onert/core/src/ir/operation/Abs.cc
deleted file mode 100644
index b06705d07..000000000
--- a/runtime/onert/core/src/ir/operation/Abs.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Abs.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Abs::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Abs::Abs(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Add.cc b/runtime/onert/core/src/ir/operation/Add.cc
deleted file mode 100644
index 2fa30f8ed..000000000
--- a/runtime/onert/core/src/ir/operation/Add.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Add.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Add::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Add::Add(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/AvgPool2D.cc b/runtime/onert/core/src/ir/operation/AvgPool2D.cc
deleted file mode 100644
index 28d4fcb54..000000000
--- a/runtime/onert/core/src/ir/operation/AvgPool2D.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/AvgPool2D.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void AvgPool2D::accept(OperationVisitor &v) const { v.visit(*this); }
-
-AvgPool2D::AvgPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Quantize.cc b/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc
index 0e3d5b69b..2b1422c73 100644
--- a/runtime/onert/core/src/ir/operation/Quantize.cc
+++ b/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc
@@ -14,7 +14,10 @@
* limitations under the License.
*/
-#include "ir/operation/Quantize.h"
+#include "ir/operation/BinaryArithmetic.h"
+
+#include <cassert>
+#include <unordered_map>
#include "ir/OperationVisitor.h"
@@ -25,11 +28,23 @@ namespace ir
namespace operation
{
-void Quantize::accept(OperationVisitor &v) const { v.visit(*this); }
+void BinaryArithmetic::accept(OperationVisitor &v) const { v.visit(*this); }
+
+BinaryArithmetic::BinaryArithmetic(const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs, const Param &param)
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+{
+}
-Quantize::Quantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+std::string BinaryArithmetic::name() const
{
+ using ArithmeticType = onert::ir::operation::BinaryArithmetic::ArithmeticType;
+ static const std::unordered_map<ArithmeticType, std::string> name_map{
+ {ArithmeticType::ADD, std::string{"Add"}},
+ {ArithmeticType::SUB, std::string{"Sub"}},
+ {ArithmeticType::MUL, std::string{"Mul"}},
+ {ArithmeticType::DIV, std::string{"Div"}}};
+ return name_map.at(_param.arithmetic_type);
}
} // namespace operation
diff --git a/runtime/onert/core/src/ir/operation/Cast.cc b/runtime/onert/core/src/ir/operation/Cast.cc
deleted file mode 100644
index 09d9c327e..000000000
--- a/runtime/onert/core/src/ir/operation/Cast.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Cast.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Cast::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Cast::Cast(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Dequantize.cc b/runtime/onert/core/src/ir/operation/Dequantize.cc
deleted file mode 100644
index 14d6362bd..000000000
--- a/runtime/onert/core/src/ir/operation/Dequantize.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Dequantize.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Dequantize::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Dequantize::Dequantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Div.cc b/runtime/onert/core/src/ir/operation/Div.cc
deleted file mode 100644
index b095d9811..000000000
--- a/runtime/onert/core/src/ir/operation/Div.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Div.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Div::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Div::Div(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc b/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc
new file mode 100644
index 000000000..f6718b656
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/ElementwiseActivation.h"
+
+#include <cassert>
+#include <unordered_map>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void ElementwiseActivation::accept(OperationVisitor &v) const { v.visit(*this); }
+
+ElementwiseActivation::ElementwiseActivation(const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+{
+ if (param.op_type == Type::LOGISTIC)
+ {
+ assert(param.alpha == 0.0f && param.beta == 0.0f && "Logistic will be supported only as "
+ "sigmoid function(L=1, k=1, x0=0). So, do "
+ "not use alpha and beta");
+ }
+ else if (param.op_type == Type::RELU)
+ {
+ assert(param.alpha >= param.beta && "ReLU's alpha must be equal or greater than beta");
+ }
+ else if (param.op_type == Type::TANH)
+ {
+ assert(param.alpha == 1.0f && param.beta == 1.0f && "f(x) = alpha * tanh(beta * x), Tanh is "
+ "supported only the values of alpha and "
+ "beta are 1.f");
+ }
+}
+
+std::string ElementwiseActivation::name() const
+{
+ using ElementwiseActivationType = onert::ir::operation::ElementwiseActivation::Type;
+ static const std::unordered_map<Type, std::string> name_map{
+ {ElementwiseActivationType::ELU, "ELU"},
+ {ElementwiseActivationType::LOGISTIC, "Logistic"},
+ {ElementwiseActivationType::RELU, "ReLU"},
+ {ElementwiseActivationType::TANH, "Tanh"},
+ {ElementwiseActivationType::LEAKY_RELU, "LeakyRelu"}};
+ return name_map.at(_param.op_type);
+}
+
+float ElementwiseActivation::infinity = std::numeric_limits<float>::infinity();
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc b/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc
new file mode 100644
index 000000000..3287fc0a3
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/ElementwiseBinary.h"
+
+#include <cassert>
+#include <unordered_map>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void ElementwiseBinary::accept(OperationVisitor &v) const { v.visit(*this); }
+
+ElementwiseBinary::ElementwiseBinary(const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs, const Param &param)
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+{
+}
+
+std::string ElementwiseBinary::name() const
+{
+ using ElementwiseBinaryType = onert::ir::operation::ElementwiseBinary::ElementwiseBinaryType;
+ static const std::unordered_map<ElementwiseBinaryType, std::string> name_map{
+ {ElementwiseBinaryType::LOGICAL_AND, std::string{"LogicalAnd"}},
+ {ElementwiseBinaryType::LOGICAL_OR, std::string{"LogicalOr"}},
+ {ElementwiseBinaryType::MAX, std::string{"Max"}},
+ {ElementwiseBinaryType::MIN, std::string{"Min"}}};
+ return name_map.at(_param.op_type);
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc
new file mode 100644
index 000000000..7dfcd4a98
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/ElementwiseUnary.h"
+
+#include <cassert>
+#include <unordered_map>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void ElementwiseUnary::accept(OperationVisitor &v) const { v.visit(*this); }
+
+ElementwiseUnary::ElementwiseUnary(const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs, const Param &param)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+{
+}
+
+std::string ElementwiseUnary::name() const
+{
+ using ElementwiseUnaryType = onert::ir::operation::ElementwiseUnary::Type;
+ static const std::unordered_map<ElementwiseUnaryType, std::string> name_map{
+ {ElementwiseUnaryType::ABS, std::string{"Abs"}},
+ {ElementwiseUnaryType::CAST, std::string{"Cast"}},
+ {ElementwiseUnaryType::COS, std::string{"Cos"}},
+ {ElementwiseUnaryType::DEQUANTIZE, std::string{"Dequantize"}},
+ {ElementwiseUnaryType::ERF, std::string{"Erf"}},
+ {ElementwiseUnaryType::EXP, std::string{"Exp"}},
+ {ElementwiseUnaryType::FLOOR, std::string{"Floor"}},
+ {ElementwiseUnaryType::LOG, std::string{"Log"}},
+ {ElementwiseUnaryType::LOGICAL_NOT, std::string{"LogicalNot"}},
+ {ElementwiseUnaryType::NEG, std::string{"Neg"}},
+ {ElementwiseUnaryType::QUANTIZE, std::string{"Quantize"}},
+ {ElementwiseUnaryType::ROUND, std::string{"Round"}},
+ {ElementwiseUnaryType::RSQRT, std::string{"RSqrt"}},
+ {ElementwiseUnaryType::SIN, std::string{"Sin"}},
+ {ElementwiseUnaryType::SQRT, std::string{"Sqrt"}},
+ {ElementwiseUnaryType::SQURE, std::string{"Squre"}},
+ {ElementwiseUnaryType::ZEROS_LIKE, std::string{"ZerosLike"}}};
+ return name_map.at(_param.op_type);
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Exp.cc b/runtime/onert/core/src/ir/operation/Exp.cc
deleted file mode 100644
index 0b22e080a..000000000
--- a/runtime/onert/core/src/ir/operation/Exp.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Exp.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Exp::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Exp::Exp(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Floor.cc b/runtime/onert/core/src/ir/operation/Floor.cc
deleted file mode 100644
index dc01535ad..000000000
--- a/runtime/onert/core/src/ir/operation/Floor.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Floor.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Floor::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Floor::Floor(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/L2Pool2D.cc b/runtime/onert/core/src/ir/operation/L2Pool2D.cc
deleted file mode 100644
index 8f21b93e0..000000000
--- a/runtime/onert/core/src/ir/operation/L2Pool2D.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/L2Pool2D.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void L2Pool2D::accept(OperationVisitor &v) const { v.visit(*this); }
-
-L2Pool2D::L2Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/LogicalAnd.cc b/runtime/onert/core/src/ir/operation/LogicalAnd.cc
deleted file mode 100644
index 0d50706ca..000000000
--- a/runtime/onert/core/src/ir/operation/LogicalAnd.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/LogicalAnd.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void LogicalAnd::accept(OperationVisitor &v) const { v.visit(*this); }
-
-LogicalAnd::LogicalAnd(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/LogicalNot.cc b/runtime/onert/core/src/ir/operation/LogicalNot.cc
deleted file mode 100644
index 8f1142102..000000000
--- a/runtime/onert/core/src/ir/operation/LogicalNot.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/LogicalNot.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void LogicalNot::accept(OperationVisitor &v) const { v.visit(*this); }
-
-LogicalNot::LogicalNot(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/LogicalOr.cc b/runtime/onert/core/src/ir/operation/LogicalOr.cc
deleted file mode 100644
index d75207c4a..000000000
--- a/runtime/onert/core/src/ir/operation/LogicalOr.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/LogicalOr.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void LogicalOr::accept(OperationVisitor &v) const { v.visit(*this); }
-
-LogicalOr::LogicalOr(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Logistic.cc b/runtime/onert/core/src/ir/operation/Logistic.cc
deleted file mode 100644
index 77d9d17de..000000000
--- a/runtime/onert/core/src/ir/operation/Logistic.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Logistic.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Logistic::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Logistic::Logistic(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Max.cc b/runtime/onert/core/src/ir/operation/Max.cc
deleted file mode 100644
index 281f9d451..000000000
--- a/runtime/onert/core/src/ir/operation/Max.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Max.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Max::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Max::Max(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/MaxPool2D.cc b/runtime/onert/core/src/ir/operation/MaxPool2D.cc
deleted file mode 100644
index eac53cc5e..000000000
--- a/runtime/onert/core/src/ir/operation/MaxPool2D.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/MaxPool2D.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void MaxPool2D::accept(OperationVisitor &v) const { v.visit(*this); }
-
-MaxPool2D::MaxPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Min.cc b/runtime/onert/core/src/ir/operation/Min.cc
deleted file mode 100644
index 8be7f0cc8..000000000
--- a/runtime/onert/core/src/ir/operation/Min.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Min.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Min::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Min::Min(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Mul.cc b/runtime/onert/core/src/ir/operation/Mul.cc
deleted file mode 100644
index 03cdf1b61..000000000
--- a/runtime/onert/core/src/ir/operation/Mul.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Mul.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Mul::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Mul::Mul(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Neg.cc b/runtime/onert/core/src/ir/operation/Neg.cc
deleted file mode 100644
index df623a13b..000000000
--- a/runtime/onert/core/src/ir/operation/Neg.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Neg.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Neg::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Neg::Neg(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Pad.cc b/runtime/onert/core/src/ir/operation/Pad.cc
index aecc2d994..0c56e92e3 100644
--- a/runtime/onert/core/src/ir/operation/Pad.cc
+++ b/runtime/onert/core/src/ir/operation/Pad.cc
@@ -27,8 +27,10 @@ namespace operation
void Pad::accept(OperationVisitor &v) const { v.visit(*this); }
+// PAD: 2 inputs
+// PADV2: 3 inputs
Pad::Pad(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+ : Operation{OperandConstraint::createInRange(2u, 3u), inputs, outputs}
{
}
diff --git a/runtime/onert/core/src/ir/operation/Sin.cc b/runtime/onert/core/src/ir/operation/Pool2D.cc
index 631505f36..761d14c3d 100644
--- a/runtime/onert/core/src/ir/operation/Sin.cc
+++ b/runtime/onert/core/src/ir/operation/Pool2D.cc
@@ -14,9 +14,10 @@
* limitations under the License.
*/
-#include "ir/operation/Sin.h"
+#include "ir/operation/Pool2D.h"
#include <cassert>
+#include <unordered_map>
#include "ir/OperationVisitor.h"
@@ -27,13 +28,24 @@ namespace ir
namespace operation
{
-void Sin::accept(OperationVisitor &v) const { v.visit(*this); }
+void Pool2D::accept(OperationVisitor &v) const { v.visit(*this); }
-Sin::Sin(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+Pool2D::Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
{
}
+std::string Pool2D::name() const
+{
+ using PoolType = onert::ir::operation::Pool2D::PoolType;
+ static const std::unordered_map<PoolType, std::string> name_map{
+ {PoolType::AVG, "Avg" + std::string{toString(opcode())}},
+ {PoolType::L2, "L2" + std::string{toString(opcode())}},
+ {PoolType::MAX, "Max" + std::string{toString(opcode())}}};
+ return name_map.at(_param.op_type);
+}
+
} // namespace operation
} // namespace ir
} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/RSQRT.cc b/runtime/onert/core/src/ir/operation/RSQRT.cc
deleted file mode 100644
index 2bce1fa28..000000000
--- a/runtime/onert/core/src/ir/operation/RSQRT.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/RSQRT.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void RSQRT::accept(OperationVisitor &v) const { v.visit(*this); }
-
-RSQRT::RSQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Cos.cc b/runtime/onert/core/src/ir/operation/Rank.cc
index 831a92dbd..c357e9018 100644
--- a/runtime/onert/core/src/ir/operation/Cos.cc
+++ b/runtime/onert/core/src/ir/operation/Rank.cc
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ir/operation/Cos.h"
+#include "ir/operation/Rank.h"
#include <cassert>
@@ -27,9 +27,9 @@ namespace ir
namespace operation
{
-void Cos::accept(OperationVisitor &v) const { v.visit(*this); }
+void Rank::accept(OperationVisitor &v) const { v.visit(*this); }
-Cos::Cos(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+Rank::Rank(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
: Operation{OperandConstraint::createExact(1u), inputs, outputs}
{
}
diff --git a/runtime/onert/core/src/ir/operation/ReLU.cc b/runtime/onert/core/src/ir/operation/ReLU.cc
deleted file mode 100644
index f0c88478b..000000000
--- a/runtime/onert/core/src/ir/operation/ReLU.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/ReLU.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void ReLU::accept(OperationVisitor &v) const { v.visit(*this); }
-
-ReLU::ReLU(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ReLU1.cc b/runtime/onert/core/src/ir/operation/ReLU1.cc
deleted file mode 100644
index 734f0b65b..000000000
--- a/runtime/onert/core/src/ir/operation/ReLU1.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/ReLU1.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void ReLU1::accept(OperationVisitor &v) const { v.visit(*this); }
-
-ReLU1::ReLU1(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ReLU6.cc b/runtime/onert/core/src/ir/operation/ReLU6.cc
deleted file mode 100644
index 5972329af..000000000
--- a/runtime/onert/core/src/ir/operation/ReLU6.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/ReLU6.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void ReLU6::accept(OperationVisitor &v) const { v.visit(*this); }
-
-ReLU6::ReLU6(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Round.cc b/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc
index 16dfb2b1b..9f17af97c 100644
--- a/runtime/onert/core/src/ir/operation/Round.cc
+++ b/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ir/operation/Round.h"
+#include "ir/operation/ResizeNearestNeighbor.h"
#include <cassert>
@@ -27,10 +27,12 @@ namespace ir
namespace operation
{
-void Round::accept(OperationVisitor &v) const { v.visit(*this); }
+void ResizeNearestNeighbor::accept(OperationVisitor &v) const { v.visit(*this); }
-Round::Round(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+ResizeNearestNeighbor::ResizeNearestNeighbor(const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
{
}
diff --git a/runtime/onert/core/src/ir/operation/SQRT.cc b/runtime/onert/core/src/ir/operation/SQRT.cc
deleted file mode 100644
index ad887d89a..000000000
--- a/runtime/onert/core/src/ir/operation/SQRT.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/SQRT.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void SQRT::accept(OperationVisitor &v) const { v.visit(*this); }
-
-SQRT::SQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Sub.cc b/runtime/onert/core/src/ir/operation/Sub.cc
deleted file mode 100644
index d71071686..000000000
--- a/runtime/onert/core/src/ir/operation/Sub.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Sub.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Sub::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Sub::Sub(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Tanh.cc b/runtime/onert/core/src/ir/operation/Tanh.cc
deleted file mode 100644
index 8fab0c0f3..000000000
--- a/runtime/onert/core/src/ir/operation/Tanh.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Tanh.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Tanh::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Tanh::Tanh(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ZerosLike.cc b/runtime/onert/core/src/ir/operation/ZerosLike.cc
deleted file mode 100644
index 5f49b98d1..000000000
--- a/runtime/onert/core/src/ir/operation/ZerosLike.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/ZerosLike.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void ZerosLike::accept(OperationVisitor &v) const { v.visit(*this); }
-
-ZerosLike::ZerosLike(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/PermutationOperationPass.h b/runtime/onert/core/src/ir/pass/PermutationOperationPass.h
deleted file mode 100644
index 6dec9ea8f..000000000
--- a/runtime/onert/core/src/ir/pass/PermutationOperationPass.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_GRAPH_PASS_PERMUTATION_OPERATION_PASS_H__
-#define __ONERT_GRAPH_PASS_PERMUTATION_OPERATION_PASS_H__
-
-#include "ir/OperationVisitor.h"
-#include "LoweredOperationPass.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace pass
-{
-
-class PermutationOperationPass : public LoweredOperationPass, public OperationVisitor
-{
-public:
- using LoweredOperationPass::LoweredOperationPass;
-
-public:
- std::string id() final { return "PermutationOperationPass"; }
-
-public:
- void callback(const OperationIndex &i, Operation &n) final;
-
-public:
- void visit(const operation::Add &) final;
- void visit(const operation::Comparison &) final;
- void visit(const operation::Concat &) final;
- void visit(const operation::Div &) final;
- void visit(const operation::LogicalAnd &) final;
- void visit(const operation::LogicalNot &) final;
- void visit(const operation::LogicalOr &) final;
- void visit(const operation::Max &) final;
- void visit(const operation::Min &) final;
- void visit(const operation::Mul &) final;
- void visit(const operation::Pack &) final;
- void visit(const operation::PReLU &) final;
- void visit(const operation::SquaredDifference &) final;
- void visit(const operation::Sub &) final;
- void visit(const operation::Unpack &) final;
- void visit(const operation::FullyConnected &) final;
- void visit(const operation::Gather &) final;
- void visit(const operation::Reshape &) final;
-
-private:
- void applyExpandRanks(const Operation &);
- void changeToKeepLayout(const Operation &);
-};
-
-} // namespace pass
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_GRAPH_PASS_PERMUTATION_OPERATION_PASS_H__
diff --git a/runtime/onert/core/src/util/EventRecorder.cc b/runtime/onert/core/src/util/EventRecorder.cc
index ec7f92117..13a599bed 100644
--- a/runtime/onert/core/src/util/EventRecorder.cc
+++ b/runtime/onert/core/src/util/EventRecorder.cc
@@ -21,7 +21,12 @@
#include <unordered_map>
#include <json/json.h>
#include <assert.h>
+#include <utility>
+#include <map>
+#include <set>
+#include <stdint.h>
+// json type for Chrome Event Trace
namespace
{
@@ -110,6 +115,290 @@ std::string object(const CounterEvent &evt)
} // namespace
+// md table type
+namespace
+{
+
+void writeMDTableRow(std::ostream &os, const std::vector<std::string> &list)
+{
+ os << "| ";
+ for (auto &key : list)
+ {
+ os << key << " | ";
+ }
+ os << "\n";
+}
+
+struct MDContent
+{
+ std::string name;
+ uint64_t begin_ts;
+ uint64_t end_ts;
+ uint32_t min_rss;
+ uint32_t max_rss;
+ uint32_t min_page_reclaims;
+ uint32_t max_page_reclaims;
+
+ MDContent()
+ : begin_ts(0), end_ts(0), min_rss(UINT32_MAX), max_rss(0), min_page_reclaims(UINT32_MAX),
+ max_page_reclaims(0)
+ {
+ // DO NOTHING
+ }
+
+ virtual ~MDContent() = default;
+
+ void updateRss(uint32_t rss)
+ {
+ if (min_rss == UINT32_MAX)
+ min_rss = rss;
+ if (max_rss == 0)
+ max_rss = rss;
+
+ if (min_rss > rss)
+ min_rss = rss;
+ else if (max_rss < rss)
+ max_rss = rss;
+ }
+
+ void updateMinflt(uint32_t minflt)
+ {
+ if (min_page_reclaims == UINT32_MAX)
+ min_page_reclaims = minflt;
+ if (max_page_reclaims == 0)
+ max_page_reclaims = minflt;
+
+ if (min_page_reclaims > minflt)
+ min_page_reclaims = minflt;
+ else if (max_page_reclaims < minflt)
+ max_page_reclaims = minflt;
+ }
+
+ virtual void write(std::ostream &os) const = 0;
+};
+
+struct OpSeq : public MDContent
+{
+ std::string backend;
+ uint64_t graph_latency;
+
+ struct OpSeqCmp
+ {
+ bool operator()(const OpSeq &lhs, const OpSeq &rhs) const
+ {
+ return lhs.begin_ts < rhs.begin_ts;
+ }
+ bool operator()(const OpSeq &lhs, const OpSeq &rhs) { return lhs.begin_ts < rhs.begin_ts; }
+ bool operator()(OpSeq &lhs, OpSeq &rhs) { return lhs.begin_ts < rhs.begin_ts; }
+ };
+
+ void write(std::ostream &os) const override
+ {
+ uint64_t opseq_latency = end_ts - begin_ts;
+ double opseq_per = static_cast<double>(opseq_latency) / graph_latency * 100.0;
+ writeMDTableRow(os, {name, backend, std::to_string(opseq_latency), std::to_string(opseq_per),
+ std::to_string(min_rss), std::to_string(max_rss),
+ std::to_string(min_page_reclaims), std::to_string(max_page_reclaims)});
+ }
+};
+
+struct Graph : public MDContent
+{
+ std::set<OpSeq, OpSeq::OpSeqCmp> opseqs;
+
+ void setOpSeqs(const std::map<std::string, OpSeq> &name_to_opseq)
+ {
+ uint64_t graph_latency = end_ts - begin_ts;
+ for (auto it : name_to_opseq)
+ {
+ auto opseq = it.second;
+ opseq.graph_latency = graph_latency;
+
+ opseqs.insert(opseq);
+
+ updateRss(opseq.min_rss);
+ updateRss(opseq.max_rss);
+ updateMinflt(opseq.min_page_reclaims);
+ updateMinflt(opseq.max_page_reclaims);
+ }
+ }
+
+ void write(std::ostream &os) const override
+ {
+ static std::vector<std::string> graph_headers{"latency(us)", "rss_min(kb)", "rss_max(kb)",
+ "page_reclaims_min", "page_reclaims_max"};
+
+ static std::vector<std::string> graph_headers_line{"-----------", "-------", "-------",
+ "-----------------", "-----------------"};
+
+ // Graph's Header
+ writeMDTableRow(os, graph_headers);
+ writeMDTableRow(os, graph_headers_line);
+
+ // Graph's contents
+ writeMDTableRow(os, {std::to_string(end_ts - begin_ts), std::to_string(min_rss),
+ std::to_string(max_rss), std::to_string(min_page_reclaims),
+ std::to_string(max_page_reclaims)});
+
+ os << "\n";
+
+ static std::vector<std::string> opseq_headers{
+ "OpSeq name", "backend", "latency(us)", "latency(%)",
+ "rss_min(kb)", "rss_max(kb)", "page_reclaims_min", "page_reclaims_max"};
+
+ static std::vector<std::string> opseq_headers_line{
+ "----------", "-------", "-----------", "-----------",
+ "-------", "-------", "-----------------", "-----------------"};
+
+ os << "## OpSequences \n";
+
+ // OpSeq's Header
+ writeMDTableRow(os, opseq_headers);
+ writeMDTableRow(os, opseq_headers_line);
+
+ // OpSeq's contents
+ for (auto opseq : opseqs)
+ {
+ opseq.write(os);
+ }
+
+ os << "\n";
+ }
+};
+
+struct MDTableBuilder
+{
+ MDTableBuilder(const std::vector<DurationEvent> &duration_events,
+ const std::vector<CounterEvent> &counter_events)
+ : _duration_events(duration_events), _counter_events(counter_events)
+ {
+ for (const auto &evt : _counter_events)
+ {
+ uint64_t ts = std::stoull(evt.ts);
+ auto &name = evt.name;
+ assert(name.compare("maxrss") == 0 || name.compare("minflt") == 0);
+ assert(evt.values.size() == 1);
+ auto &val = evt.values.begin()->second;
+ if (_ts_to_values.find(ts) == _ts_to_values.end())
+ {
+ std::pair<uint32_t, uint32_t> values;
+ if (name.compare("maxrss") == 0)
+ values.first = std::stoul(val);
+ else
+ values.second = std::stoul(val);
+ _ts_to_values.insert({ts, values});
+ }
+ else
+ {
+ auto &values = _ts_to_values.at(ts);
+ if (name.compare("maxrss") == 0)
+ values.first = std::stoul(val);
+ else
+ values.second = std::stoul(val);
+ }
+ }
+ }
+
+ MDTableBuilder &build()
+ {
+ for (auto &it : divideGraph())
+ {
+ size_t begin_idx = it.first;
+ size_t end_idx = it.second;
+ std::map<std::string, OpSeq> name_to_opseq;
+ for (size_t i = begin_idx + 1; i < end_idx; ++i)
+ {
+ const auto &evt = _duration_events[i];
+ assert(evt.name.compare("Graph") != 0);
+ assert(evt.ph.compare("B") == 0 || evt.ph.compare("E") == 0);
+ if (evt.ph.compare("B") == 0)
+ {
+ assert(name_to_opseq.find(evt.name) == name_to_opseq.end());
+ name_to_opseq.insert({evt.name, makeOpSeq(evt)});
+ }
+ else
+ {
+ assert(name_to_opseq.find(evt.name) != name_to_opseq.end());
+ auto &opseq = name_to_opseq.at(evt.name);
+ updateOpSeq(opseq, evt);
+ }
+ }
+
+ _graphs.emplace_back(makeGraph(begin_idx, end_idx, name_to_opseq));
+ }
+
+ return *this;
+ }
+
+ std::vector<std::pair<size_t, size_t>> divideGraph()
+ {
+ std::vector<std::pair<size_t, size_t>> graph_idx_list; // pair<begin_idx, end_idx>
+ for (size_t i = 0, begin_idx = 0; i < _duration_events.size(); ++i)
+ {
+ const auto &evt = _duration_events.at(i);
+ if (evt.name.compare("Graph") == 0)
+ {
+ if (evt.ph.compare("B") == 0)
+ begin_idx = i;
+ else
+ graph_idx_list.emplace_back(begin_idx, i);
+ }
+ }
+ return graph_idx_list;
+ }
+
+ OpSeq makeOpSeq(const DurationEvent &evt)
+ {
+ OpSeq opseq;
+ opseq.name = evt.name;
+ opseq.begin_ts = std::stoull(evt.ts);
+ opseq.updateRss(_ts_to_values.at(opseq.begin_ts).first);
+ opseq.updateMinflt(_ts_to_values.at(opseq.begin_ts).second);
+ opseq.backend = evt.tid;
+ return opseq;
+ }
+
+ void updateOpSeq(OpSeq &opseq, const DurationEvent &evt)
+ {
+ opseq.end_ts = std::stoull(evt.ts);
+ opseq.updateRss(_ts_to_values.at(opseq.end_ts).first);
+ opseq.updateMinflt(_ts_to_values.at(opseq.end_ts).second);
+ }
+
+ Graph makeGraph(size_t begin_idx, size_t end_idx,
+ const std::map<std::string, OpSeq> &name_to_opseq)
+ {
+ Graph graph;
+ graph.name = "Graph";
+ graph.begin_ts = std::stoull(_duration_events[begin_idx].ts);
+ graph.updateRss(_ts_to_values.at(graph.begin_ts).first);
+ graph.updateMinflt(_ts_to_values.at(graph.begin_ts).second);
+ graph.end_ts = std::stoull(_duration_events[end_idx].ts);
+ graph.updateRss(_ts_to_values.at(graph.end_ts).first);
+ graph.updateMinflt(_ts_to_values.at(graph.end_ts).second);
+ graph.setOpSeqs(name_to_opseq);
+ return graph;
+ }
+
+ void write(std::ostream &os)
+ {
+ // Write contents
+ for (size_t i = 0; i < _graphs.size(); ++i)
+ {
+ os << "# Graph " << i << "\n";
+ _graphs.at(i).write(os);
+ }
+ }
+
+ const std::vector<DurationEvent> &_duration_events;
+ const std::vector<CounterEvent> &_counter_events;
+ // timestamp to std::pair<maxrss, minflt>
+ std::unordered_map<uint64_t, std::pair<uint32_t, uint32_t>> _ts_to_values;
+ std::vector<Graph> _graphs;
+};
+
+} // namespace
+
void EventRecorder::emit(const DurationEvent &evt)
{
std::lock_guard<std::mutex> lock{_mu};
@@ -136,6 +425,9 @@ void EventRecorder::writeToFile(std::ostream &os)
case WriteFormat::SNPE_BENCHMARK:
writeSNPEBenchmark(os);
break;
+ case WriteFormat::MD_TABLE:
+ writeMDTable(os);
+ break;
default:
assert(!"Invalid value");
break;
@@ -258,3 +550,8 @@ void EventRecorder::writeChromeTrace(std::ostream &os)
os << " ]\n";
os << "}\n";
}
+
+void EventRecorder::writeMDTable(std::ostream &os)
+{
+ MDTableBuilder(_duration_events, _counter_events).build().write(os);
+}
diff --git a/runtime/onert/core/src/util/EventRecorder.h b/runtime/onert/core/src/util/EventRecorder.h
index 6eea06986..37ec1a0f1 100644
--- a/runtime/onert/core/src/util/EventRecorder.h
+++ b/runtime/onert/core/src/util/EventRecorder.h
@@ -53,7 +53,8 @@ public:
enum class WriteFormat
{
CHROME_TRACING,
- SNPE_BENCHMARK
+ SNPE_BENCHMARK,
+ MD_TABLE,
};
public:
@@ -71,6 +72,7 @@ public:
private:
void writeSNPEBenchmark(std::ostream &os);
void writeChromeTrace(std::ostream &os);
+ void writeMDTable(std::ostream &os);
private:
std::mutex _mu;
diff --git a/runtime/onert/core/src/util/ShapeInference.cc b/runtime/onert/core/src/util/ShapeInference.cc
index 9a24f8c1a..95c15049d 100644
--- a/runtime/onert/core/src/util/ShapeInference.cc
+++ b/runtime/onert/core/src/util/ShapeInference.cc
@@ -18,8 +18,6 @@
#include "util/Utils.h"
#include "ir/InternalType.h"
#include "ir/Shape.h"
-#include "ir/operation/AvgPool2D.h"
-#include "ir/operation/MaxPool2D.h"
#include "util/ShapeInference.h"
#include "util/logging.h"
@@ -81,10 +79,12 @@ ir::Shape broadcastShapes(const ir::Shape &lhs_shape, const ir::Shape &rhs_shape
// Calculate output height and width of convolution-like operation
std::pair<int, int> calcConvLikeHeightAndWidth(const int in_h, const int in_w, const int ker_h,
const int ker_w, const ir::Padding pad,
- const ir::Stride stride)
+ const ir::Stride stride,
+ const ir::Dilation dilation = {1, 1})
{
int32_t out_h = 0, out_w = 0;
-
+ int32_t effective_filter_w_size = (ker_w - 1) * dilation.width_factor + 1;
+ int32_t effective_filter_h_size = (ker_h - 1) * dilation.height_factor + 1;
switch (pad.type)
{
case ir::PaddingType::SAME:
@@ -92,12 +92,15 @@ std::pair<int, int> calcConvLikeHeightAndWidth(const int in_h, const int in_w, c
out_w = ceil_div(in_w, stride.horizontal);
break;
case ir::PaddingType::VALID:
- out_h = ceil_div(in_h - ker_h + 1, stride.vertical);
- out_w = ceil_div(in_w - ker_w + 1, stride.horizontal);
+ out_h = ceil_div(in_h - effective_filter_h_size + 1, stride.vertical);
+ out_w = ceil_div(in_w - effective_filter_w_size + 1, stride.horizontal);
break;
case ir::PaddingType::EXPLICIT:
- out_h = (in_h + pad.param.top + pad.param.bottom - ker_h) / stride.vertical + 1;
- out_w = (in_w + pad.param.left + pad.param.right - ker_w) / stride.horizontal + 1;
+ out_h =
+ (in_h + pad.param.top + pad.param.bottom - effective_filter_h_size) / stride.vertical + 1;
+ out_w =
+ (in_w + pad.param.left + pad.param.right - effective_filter_w_size) / stride.horizontal +
+ 1;
break;
default:
assert(false);
@@ -126,17 +129,6 @@ ir::Shape inferArgMaxShape(const ir::Shape &input_shape, int axis, int rank)
return out_shape;
}
-ir::Shape inferAvgPoolShape(const ir::Shape &in_shape, const ir::operation::AvgPool2D::Param &param,
- const ir::Layout layout)
-{
- assert(layout == ir::Layout::NHWC);
- auto ifm_shape = in_shape.asFeature(layout);
- const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, param.kh, param.kw,
- param.padding, param.stride);
- // Pooling don't change number of channels and batch size
- return ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, ifm_shape.C};
-}
-
ir::Shape inferReduceShape(const ir::Shape &input_shape, const std::vector<int> &axes,
bool keep_dims)
{
@@ -320,7 +312,7 @@ ir::Shape inferConv2DShape(const ir::Shape &in_shape, const ir::Shape &ker_shape
assert(ifm_shape.C == kf_shape.C);
const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, kf_shape.H, kf_shape.W,
- param.padding, param.stride);
+ param.padding, param.stride, param.dilation);
return ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, kf_shape.N};
}
@@ -411,17 +403,6 @@ ir::Shape inferGatherShape(const ir::Shape &input_shape, const ir::Shape &indice
return out_shape;
}
-ir::Shape inferMaxPoolShape(const ir::Shape &in_shape, const ir::operation::MaxPool2D::Param &param,
- const ir::Layout layout)
-{
- assert(layout == ir::Layout::NHWC);
- auto ifm_shape = in_shape.asFeature(layout);
- const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, param.kh, param.kw,
- param.padding, param.stride);
- // Pooling don't change number of channels and batch size
- return ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, ifm_shape.C};
-}
-
ir::Shape inferOnehotShape(const ir::Shape &input_shape, const int depth, int axis)
{
assert(depth >= 0);
@@ -486,6 +467,17 @@ ir::Shape inferPadShape(const ir::Shape &in_shape, const int32_t *pad_buf, const
return ret;
}
+ir::Shape inferPoolShape(const ir::Shape &in_shape, const ir::operation::Pool2D::Param &param,
+ const ir::Layout layout)
+{
+ assert(layout == ir::Layout::NHWC);
+ auto ifm_shape = in_shape.asFeature(layout);
+ const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, param.kh, param.kw,
+ param.padding, param.stride);
+ // Pooling don't change number of channels and batch size
+ return ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, ifm_shape.C};
+}
+
ir::Shape inferResizeBilinearShape(const ir::Shape &in_shape, const int32_t output_height,
const int32_t output_width)
{
diff --git a/runtime/onert/frontend/base_loader/include/base_loader.h b/runtime/onert/frontend/base_loader/include/base_loader.h
index 0f6a2a5d0..480452e01 100644
--- a/runtime/onert/frontend/base_loader/include/base_loader.h
+++ b/runtime/onert/frontend/base_loader/include/base_loader.h
@@ -105,40 +105,39 @@ protected:
template <typename Param, typename OptionsType>
void loadStridesAndPaddings(Param &param, const OptionsType *options);
// Load Pool2D param
- template <typename Param> void loadPool2D(Param &param, const Pool2DOptions *options);
+ template <typename Param> void loadPool2DOptions(Param &param, const Pool2DOptions *options);
// Operations
void loadConv2D(const Operator *op, ir::Graph &subg);
void loadDepthwiseConv2D(const Operator *op, ir::Graph &subg);
void loadTransposeConv(const Operator *op, ir::Graph &subg);
- void loadAvgPool2D(const Operator *op, ir::Graph &subg);
+ void loadPool2D(const Operator *op, ir::Graph &subg, ir::operation::Pool2D::PoolType op_type);
void loadReshape(const Operator *op, ir::Graph &subg);
void loadSoftmax(const Operator *op, ir::Graph &subg);
- void loadMaxPool2D(const Operator *op, ir::Graph &subg);
void loadConcatenation(const Operator *op, ir::Graph &subg);
void loadFill(const Operator *op, ir::Graph &subg);
void loadFC(const Operator *op, ir::Graph &subg);
- void loadAdd(const Operator *op, ir::Graph &subg);
- void loadSub(const Operator *op, ir::Graph &subg);
- void loadMul(const Operator *op, ir::Graph &subg);
- void loadDiv(const Operator *op, ir::Graph &subg);
+ template <ir::operation::BinaryArithmetic::ArithmeticType op_type>
+ void loadBinaryArithmetic(const Operator *op, ir::Graph &subg);
+ void loadAddV2(const Operator *op, ir::Graph &subg);
void loadPack(const Operator *op, ir::Graph &subg);
- void loadRelu(const Operator *op, ir::Graph &subg);
- void loadRelu6(const Operator *op, ir::Graph &subg);
void loadResizeBilinear(const Operator *op, ir::Graph &subg);
- void loadRsqrt(const Operator *op, ir::Graph &subg);
+ void loadResizeNearestNeighbor(const Operator *op, ir::Graph &subg);
void loadSelect(const Operator *op, ir::Graph &subg);
- void loadSqrt(const Operator *op, ir::Graph &subg);
void loadSquaredDifference(const Operator *op, ir::Graph &subg);
- void loadTanh(const Operator *op, ir::Graph &subg);
void loadTranspose(const Operator *op, ir::Graph &subg);
- void loadReduce(const Operator *op, ir::Graph &subg,
- ir::operation::Reduce::ReduceType reduce_type);
+ template <ir::operation::Reduce::ReduceType reduce_type>
+ void loadReduce(const Operator *op, ir::Graph &subg);
void loadReduceAll(const Operator *op, ir::Graph &subg);
void loadReverseV2(const Operator *op, ir::Graph &subg);
void loadPad(const Operator *op, ir::Graph &subg);
- void loadLogistic(const Operator *op, ir::Graph &subg);
- void loadExp(const Operator *op, ir::Graph &subg);
+ void loadElementwiseActivation(const Operator *op, ir::Graph &subg,
+ ir::operation::ElementwiseActivation::Type op_type,
+ float alpha = 0.f, float beta = 0.f);
+ template <ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type>
+ void loadElementwiseBinary(const Operator *op, ir::Graph &subg);
+ void loadElementwiseUnary(const Operator *op, ir::Graph &subg,
+ ir::operation::ElementwiseUnary::Type op_type);
void loadExpandDims(const Operator *op, ir::Graph &subg);
void loadGather(const Operator *op, ir::Graph &subg);
void loadCustom(const Operator *op, ir::Graph &subg);
@@ -152,35 +151,25 @@ protected:
void loadSlice(const Operator *op, ir::Graph &subg);
void loadStridedSlice(const Operator *op, ir::Graph &subg);
void loadUnpack(const Operator *op, ir::Graph &subg);
- void loadMinimum(const Operator *op, ir::Graph &subg);
- void loadMaximum(const Operator *op, ir::Graph &subg);
- void loadCast(const Operator *op, ir::Graph &subg);
void loadComparison(const Operator *op, ir::Graph &subg);
void loadEinsum(const Operator *op, ir::Graph &subg);
void loadOneHot(const Operator *op, ir::Graph &subg);
- void loadAbs(const Operator *op, ir::Graph &subg);
- void loadCos(const Operator *op, ir::Graph &subg);
- void loadSin(const Operator *op, ir::Graph &subg);
void loadShape(const Operator *op, ir::Graph &subg);
void loadIf(const Operator *op, ir::Graph &subg);
void loadWhile(const Operator *op, ir::Graph &subg);
- void loadNeg(const Operator *op, ir::Graph &subg);
- void loadLog(const Operator *op, ir::Graph &subg);
void loadArgMax(const Operator *op, ir::Graph &subg);
- void loadRound(const Operator *op, ir::Graph &subg);
void loadPow(const Operator *op, ir::Graph &subg);
- void loadLogicalNot(const Operator *op, ir::Graph &subg);
- void loadZerosLike(const Operator *op, ir::Graph &subg);
void loadTile(const Operator *op, ir::Graph &subg);
- void loadLogicalOr(const Operator *op, ir::Graph &subg);
void loadRange(const Operator *op, ir::Graph &subg);
+ void loadRank(const Operator *op, ir::Graph &subg);
void loadMatrixBandPart(const Operator *op, ir::Graph &subg);
void loadBroadcastTo(const Operator *op, ir::Graph &subg);
void loadFusedBatchNorm(const Operator *op, ir::Graph &subg);
void loadLogSoftmax(const Operator *op, ir::Graph &subg);
- void loadQuantize(const Operator *op, ir::Graph &subg);
void loadSpaceToDepth(const Operator *op, ir::Graph &subg);
void loadStatelessRandomUniform(const Operator *op, ir::Graph &subg);
+ void loadL2Normalization(const Operator *op, ir::Graph &subg);
+ void loadLeakyRelu(const Operator *op, ir::Graph &subg);
protected:
// Base address for mapped region for loading (if needed)
@@ -194,6 +183,7 @@ protected:
const Model *_model;
// Maps Tensor indices to onert Operands.
std::vector<ir::OperandIndex> _tensor_to_operand;
+ std::unordered_map<ir::OperandIndex, std::string> _tensor_names;
// Verifier
std::unique_ptr<Verifier> _verifier;
};
@@ -466,8 +456,8 @@ ir::OperandIndex BaseLoader<LoaderDomain, SpecificLoader>::loadOperand(const Ten
subg.setOperandValue(operand_index, std::move(data_obj));
}
- // Name unused
- // auto name = tensor->name();
+ _tensor_names.emplace(operand_index, tensor->name()->str());
+
// Variablie
if (tensor->is_variable())
throw std::runtime_error("Variable tensor not supported!");
@@ -518,8 +508,8 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadStridesAndPaddings(Param &par
template <typename LoaderDomain, typename SpecificLoader>
template <typename Param>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadPool2D(Param &param,
- const Pool2DOptions *options)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadPool2DOptions(Param &param,
+ const Pool2DOptions *options)
{
// Strides and Paddings
loadStridesAndPaddings(param, options);
@@ -543,7 +533,10 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadConv2D(const Operator *op, ir
const auto *options = op->builtin_options_as_Conv2DOptions();
param.activation = convertActivation(options->fused_activation_function());
loadStridesAndPaddings(param, options);
- // Dilation h/w factor unused
+
+ param.dilation.width_factor = options->dilation_w_factor();
+ param.dilation.height_factor = options->dilation_h_factor();
+
std::unique_ptr<ir::Operation> new_op(new ir::operation::Conv2D(inputs, outputs, param));
subg.addOperation(std::move(new_op));
}
@@ -585,19 +578,21 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadTransposeConv(const Operator
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadAvgPool2D(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadPool2D(const Operator *op, ir::Graph &subg,
+ ir::operation::Pool2D::PoolType op_type)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
loadOperationIO(op, inputs, outputs);
- ir::operation::AvgPool2D::Param param;
+ ir::operation::Pool2D::Param param;
+ param.op_type = op_type;
const auto *options = op->builtin_options_as_Pool2DOptions();
- loadPool2D(param, options);
+ loadPool2DOptions(param, options);
- std::unique_ptr<ir::Operation> new_op(new ir::operation::AvgPool2D(inputs, outputs, param));
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Pool2D(inputs, outputs, param));
subg.addOperation(std::move(new_op));
}
@@ -645,23 +640,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadSoftmax(const Operator *op, i
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadMaxPool2D(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- ir::operation::MaxPool2D::Param param;
- const auto *options = op->builtin_options_as_Pool2DOptions();
-
- loadPool2D(param, options);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::MaxPool2D(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadConcatenation(const Operator *op,
ir::Graph &subg)
{
@@ -719,70 +697,82 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadFC(const Operator *op, ir::Gr
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadAdd(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- ir::operation::Add::Param param;
- const auto *options = op->builtin_options_as_AddOptions();
-
- param.activation = convertActivation(options->fused_activation_function());
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Add(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSub(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- ir::operation::Sub::Param param;
- const auto *options = op->builtin_options_as_SubOptions();
-
- param.activation = convertActivation(options->fused_activation_function());
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Sub(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadMul(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadAddV2(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
loadOperationIO(op, inputs, outputs);
- ir::operation::Mul::Param param;
- const auto *options = op->builtin_options_as_MulOptions();
+ ir::operation::BinaryArithmetic::Param param;
+ param.arithmetic_type = ir::operation::BinaryArithmetic::ArithmeticType::ADD;
- param.activation = convertActivation(options->fused_activation_function());
+ if (op->custom_options() == nullptr)
+ {
+ param.activation = ir::Activation::NONE;
+ }
+ else
+ {
+ size_t custom_op_data_size = op->custom_options()->size();
+ auto custom_op_data = op->custom_options()->Data();
+ auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size);
+ auto attr_map = data_root.AsMap();
+ const auto fused_activation_func = static_cast<typename LoaderDomain::ActivationFunctionType>(
+ attr_map["fused_activation_function"].AsInt8());
+ param.activation = convertActivation(fused_activation_func);
+ }
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Mul(inputs, outputs, param));
+ std::unique_ptr<ir::Operation> new_op(
+ new ir::operation::BinaryArithmetic(inputs, outputs, param));
subg.addOperation(std::move(new_op));
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadDiv(const Operator *op, ir::Graph &subg)
+template <ir::operation::BinaryArithmetic::ArithmeticType op_type>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadBinaryArithmetic(const Operator *op,
+ ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
loadOperationIO(op, inputs, outputs);
- ir::operation::Div::Param param;
- const auto *options = op->builtin_options_as_DivOptions();
-
- param.activation = convertActivation(options->fused_activation_function());
+ ir::operation::BinaryArithmetic::Param param;
+ param.arithmetic_type = op_type;
+ switch (op_type)
+ {
+ case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+ {
+ const auto *add_options = op->builtin_options_as_AddOptions();
+ param.activation = convertActivation(add_options->fused_activation_function());
+ break;
+ }
+ case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+ {
+ const auto *sub_options = op->builtin_options_as_SubOptions();
+ param.activation = convertActivation(sub_options->fused_activation_function());
+ break;
+ }
+ case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+ {
+ const auto *mul_options = op->builtin_options_as_MulOptions();
+ param.activation = convertActivation(mul_options->fused_activation_function());
+ break;
+ }
+ case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+ {
+ const auto *div_options = op->builtin_options_as_DivOptions();
+ param.activation = convertActivation(div_options->fused_activation_function());
+ break;
+ }
+ default:
+ assert(false &&
+ "The function 'loadBinaryArithmetic' supports only BinaryArithmetic operations");
+ break;
+ }
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Div(inputs, outputs, param));
+ std::unique_ptr<ir::Operation> new_op(
+ new ir::operation::BinaryArithmetic(inputs, outputs, param));
subg.addOperation(std::move(new_op));
}
@@ -805,26 +795,22 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadPack(const Operator *op, ir::
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadRelu(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadElementwiseActivation(
+ const Operator *op, ir::Graph &subg, ir::operation::ElementwiseActivation::Type op_type,
+ float alpha, float beta)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
loadOperationIO(op, inputs, outputs);
- std::unique_ptr<ir::Operation> new_op(new ir::operation::ReLU(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
+ ir::operation::ElementwiseActivation::Param param;
+ param.op_type = op_type;
+ param.alpha = alpha;
+ param.beta = beta;
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadRelu6(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::ReLU6(inputs, outputs));
+ std::unique_ptr<ir::Operation> new_op(
+ new ir::operation::ElementwiseActivation(inputs, outputs, param));
subg.addOperation(std::move(new_op));
}
@@ -856,38 +842,40 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadResizeBilinear(const Operator
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadRsqrt(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadResizeNearestNeighbor(const Operator *op,
+ ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
loadOperationIO(op, inputs, outputs);
+ auto input = inputs.at(0);
+ auto size = inputs.at(1);
- std::unique_ptr<ir::Operation> new_op(new ir::operation::RSQRT(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
+ if (!subg.operands().at(size).isConstant())
+ throw std::runtime_error("ResizeNearestNeighbor: non-constant 'size' is not supported.");
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSelect(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
+ std::vector<std::int32_t> size_v = subg.operands().at(size).template asVector<std::int32_t>();
- loadOperationIO(op, inputs, outputs);
+ ir::operation::ResizeNearestNeighbor::Param param;
+ param.height_out = size_v[0];
+ param.width_out = size_v[1];
+ param.align_corners = op->builtin_options_as_ResizeNearestNeighborOptions()->align_corners();
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Select(inputs, outputs));
+ std::unique_ptr<ir::Operation> new_op(
+ new ir::operation::ResizeNearestNeighbor({input}, outputs, param));
subg.addOperation(std::move(new_op));
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSqrt(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadSelect(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
loadOperationIO(op, inputs, outputs);
- std::unique_ptr<ir::Operation> new_op(new ir::operation::SQRT(inputs, outputs));
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Select(inputs, outputs));
subg.addOperation(std::move(new_op));
}
@@ -905,18 +893,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadSquaredDifference(const Opera
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadTanh(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Tanh(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadTranspose(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
@@ -937,8 +913,8 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadTranspose(const Operator *op,
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadReduce(
- const Operator *op, ir::Graph &subg, ir::operation::Reduce::ReduceType reduce_type)
+template <ir::operation::Reduce::ReduceType reduce_type>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadReduce(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
@@ -1005,26 +981,49 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadPad(const Operator *op, ir::G
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadLogistic(const Operator *op, ir::Graph &subg)
+template <ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadElementwiseBinary(const Operator *op,
+ ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
loadOperationIO(op, inputs, outputs);
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Logistic(inputs, outputs));
+ ir::operation::ElementwiseBinary::Param param;
+ param.op_type = op_type;
+
+ std::unique_ptr<ir::Operation> new_op(
+ new ir::operation::ElementwiseBinary(inputs, outputs, param));
subg.addOperation(std::move(new_op));
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadExp(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadElementwiseUnary(
+ const Operator *op, ir::Graph &subg, ir::operation::ElementwiseUnary::Type op_type)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
loadOperationIO(op, inputs, outputs);
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Exp(inputs, outputs));
+ ir::operation::ElementwiseUnary::Param param;
+ param.op_type = op_type;
+
+ if (op_type == ir::operation::ElementwiseUnary::Type::CAST)
+ {
+ auto qasymm8ToUint8 = [](ir::Operand &operand) {
+ if (operand.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM)
+ {
+ operand.type(ir::DataType::UINT8);
+ }
+ };
+ qasymm8ToUint8(subg.operands().at(inputs.at(ir::operation::ElementwiseUnary::Input::INPUT)));
+ qasymm8ToUint8(subg.operands().at(outputs.at(0)));
+ }
+
+ std::unique_ptr<ir::Operation> new_op(
+ new ir::operation::ElementwiseUnary(inputs, outputs, param));
subg.addOperation(std::move(new_op));
}
@@ -1177,6 +1176,17 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadStatelessRandomUniform(const
}
template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadRank(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+ loadOperationIO(op, inputs, outputs);
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Rank(inputs, outputs));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
@@ -1197,7 +1207,8 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir
Einsum,
BroadcastTo,
FusedBatchNorm,
- StatelessRandomUniform
+ StatelessRandomUniform,
+ Erf
};
// Mapping from custom op name string to BuiltinOP enum
@@ -1210,6 +1221,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir
{"FusedBatchNormV3", BuiltinOP::FusedBatchNorm},
{"BroadcastTo", BuiltinOP::BroadcastTo},
{"StatelessRandomUniform", BuiltinOP::StatelessRandomUniform},
+ {"Erf", BuiltinOP::Erf},
};
try
@@ -1219,7 +1231,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir
switch (custom_op_id)
{
case BuiltinOP::AddV2:
- loadAdd(op, subg);
+ loadAddV2(op, subg);
break;
case BuiltinOP::ReduceAll:
loadReduceAll(op, subg);
@@ -1242,6 +1254,9 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir
case BuiltinOP::StatelessRandomUniform:
loadStatelessRandomUniform(op, subg);
break;
+ case BuiltinOP::Erf:
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::ERF);
+ break;
default:
throw std::runtime_error{
"Loader: Custom OP map is defined but operation loader function is not defined"};
@@ -1396,51 +1411,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadUnpack(const Operator *op, ir
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadMinimum(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Min(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadMaximum(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Max(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadCast(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- auto qasymm8ToUint8 = [](ir::Operand &operand) {
- if (operand.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM)
- {
- operand.type(ir::DataType::UINT8);
- }
- };
- qasymm8ToUint8(subg.operands().at(inputs.at(ir::operation::Cast::Input::INPUT)));
- qasymm8ToUint8(subg.operands().at(outputs.at(0)));
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Cast(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadComparison(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
@@ -1562,42 +1532,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOneHot(const Operator *op, ir
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadAbs(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Abs(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadCos(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Cos(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSin(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Sin(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadShape(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
@@ -1652,18 +1586,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadWhile(const Operator *op, ir:
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadNeg(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Neg(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadArgMax(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
@@ -1697,30 +1619,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadArgMax(const Operator *op, ir
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadLog(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Log(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadRound(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Round(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadPow(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
@@ -1733,31 +1631,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadPow(const Operator *op, ir::G
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadLogicalNot(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::LogicalNot(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadZerosLike(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::ZerosLike(inputs, outputs));
-
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadRange(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
@@ -1787,18 +1660,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadTile(const Operator *op, ir::
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadLogicalOr(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::LogicalOr(inputs, outputs));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadLogSoftmax(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
@@ -1817,18 +1678,27 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadLogSoftmax(const Operator *op
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadQuantize(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadL2Normalization(const Operator *op,
+ ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
loadOperationIO(op, inputs, outputs);
- std::unique_ptr<ir::Operation> new_op(new ir::operation::Quantize(inputs, outputs));
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::L2Normalization(inputs, outputs));
subg.addOperation(std::move(new_op));
}
template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadLeakyRelu(const Operator *op, ir::Graph &subg)
+{
+ float alpha = op->builtin_options_as_LeakyReluOptions()->alpha();
+ loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::LEAKY_RELU, alpha,
+ 1.f);
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op, ir::Graph &subg)
{
const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
@@ -1839,7 +1709,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
loadConv2D(op, subg);
return;
case BuiltinOperator::BuiltinOperator_AVERAGE_POOL_2D:
- loadAvgPool2D(op, subg);
+ loadPool2D(op, subg, ir::operation::Pool2D::PoolType::AVG);
return;
case BuiltinOperator::BuiltinOperator_DEPTHWISE_CONV_2D:
loadDepthwiseConv2D(op, subg);
@@ -1854,7 +1724,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
loadSoftmax(op, subg);
return;
case BuiltinOperator::BuiltinOperator_MAX_POOL_2D:
- loadMaxPool2D(op, subg);
+ loadPool2D(op, subg, ir::operation::Pool2D::PoolType::MAX);
return;
case BuiltinOperator::BuiltinOperator_CONCATENATION:
loadConcatenation(op, subg);
@@ -1863,31 +1733,40 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
loadFC(op, subg);
return;
case BuiltinOperator::BuiltinOperator_ADD:
- loadAdd(op, subg);
+ loadBinaryArithmetic<ir::operation::BinaryArithmetic::ArithmeticType::ADD>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_SUB:
- loadSub(op, subg);
+ loadBinaryArithmetic<ir::operation::BinaryArithmetic::ArithmeticType::SUB>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_MUL:
- loadMul(op, subg);
+ loadBinaryArithmetic<ir::operation::BinaryArithmetic::ArithmeticType::MUL>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_DIV:
- loadDiv(op, subg);
+ loadBinaryArithmetic<ir::operation::BinaryArithmetic::ArithmeticType::DIV>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_PACK:
loadPack(op, subg);
return;
case BuiltinOperator::BuiltinOperator_RELU:
- loadRelu(op, subg);
+ loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::RELU,
+ ir::operation::ElementwiseActivation::infinity, 0.f);
+ return;
+ case BuiltinOperator::BuiltinOperator_RELU_N1_TO_1:
+ loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::RELU, 1.f,
+ -1.f);
return;
case BuiltinOperator::BuiltinOperator_RELU6:
- loadRelu6(op, subg);
+ loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::RELU, 6.f,
+ 0.f);
return;
case BuiltinOperator::BuiltinOperator_RESIZE_BILINEAR:
loadResizeBilinear(op, subg);
return;
+ case BuiltinOperator::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR:
+ loadResizeNearestNeighbor(op, subg);
+ return;
case BuiltinOperator::BuiltinOperator_RSQRT:
- loadRsqrt(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::RSQRT);
return;
case BuiltinOperator::BuiltinOperator_SELECT:
loadSelect(op, subg);
@@ -1897,37 +1776,39 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
loadSelect(op, subg);
return;
case BuiltinOperator::BuiltinOperator_SQRT:
- loadSqrt(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::SQRT);
return;
case BuiltinOperator::BuiltinOperator_SQUARED_DIFFERENCE:
loadSquaredDifference(op, subg);
return;
case BuiltinOperator::BuiltinOperator_TANH:
- loadTanh(op, subg);
+ loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::TANH, 1.f,
+ 1.f);
return;
case BuiltinOperator::BuiltinOperator_TRANSPOSE:
loadTranspose(op, subg);
return;
case BuiltinOperator::BuiltinOperator_MEAN:
- loadReduce(op, subg, ir::operation::Reduce::ReduceType::MEAN);
+ loadReduce<ir::operation::Reduce::ReduceType::MEAN>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_REDUCE_ANY:
- loadReduce(op, subg, ir::operation::Reduce::ReduceType::ANY);
+ loadReduce<ir::operation::Reduce::ReduceType::ANY>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_REDUCE_MAX:
- loadReduce(op, subg, ir::operation::Reduce::ReduceType::MAX);
+ loadReduce<ir::operation::Reduce::ReduceType::MAX>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_REVERSE_V2:
loadReverseV2(op, subg);
return;
case BuiltinOperator::BuiltinOperator_PAD:
+ case BuiltinOperator::BuiltinOperator_PADV2:
loadPad(op, subg);
return;
case BuiltinOperator::BuiltinOperator_LOGISTIC:
- loadLogistic(op, subg);
+ loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::LOGISTIC);
return;
case BuiltinOperator::BuiltinOperator_EXP:
- loadExp(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::EXP);
return;
case BuiltinOperator::BuiltinOperator_EXPAND_DIMS:
loadExpandDims(op, subg);
@@ -1942,7 +1823,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
loadBatchToSpaceND(op, subg);
return;
case BuiltinOperator::BuiltinOperator_SUM:
- loadReduce(op, subg, ir::operation::Reduce::ReduceType::SUM);
+ loadReduce<ir::operation::Reduce::ReduceType::SUM>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_CUSTOM:
loadCustom(op, subg);
@@ -1969,13 +1850,13 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
loadUnpack(op, subg);
return;
case BuiltinOperator::BuiltinOperator_MINIMUM:
- loadMinimum(op, subg);
+ loadElementwiseBinary<ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_MAXIMUM:
- loadMaximum(op, subg);
+ loadElementwiseBinary<ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_CAST:
- loadCast(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::CAST);
return;
case BuiltinOperator::BuiltinOperator_EQUAL:
case BuiltinOperator::BuiltinOperator_NOT_EQUAL:
@@ -1989,19 +1870,19 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
loadOneHot(op, subg);
return;
case BuiltinOperator::BuiltinOperator_ABS:
- loadAbs(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::ABS);
return;
case BuiltinOperator::BuiltinOperator_COS:
- loadCos(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::COS);
return;
case BuiltinOperator::BuiltinOperator_SIN:
- loadSin(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::SIN);
return;
case BuiltinOperator::BuiltinOperator_SHAPE:
loadShape(op, subg);
return;
case BuiltinOperator::BuiltinOperator_REDUCE_PROD:
- loadReduce(op, subg, ir::operation::Reduce::ReduceType::PROD);
+ loadReduce<ir::operation::Reduce::ReduceType::PROD>(op, subg);
return;
case BuiltinOperator::BuiltinOperator_IF:
loadIf(op, subg);
@@ -2010,31 +1891,32 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
loadWhile(op, subg);
return;
case BuiltinOperator::BuiltinOperator_NEG:
- loadNeg(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::NEG);
return;
case BuiltinOperator::BuiltinOperator_ARG_MAX:
loadArgMax(op, subg);
return;
case BuiltinOperator::BuiltinOperator_LOG:
- loadLog(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::LOG);
return;
case BuiltinOperator::BuiltinOperator_ROUND:
- loadRound(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::ROUND);
return;
case BuiltinOperator::BuiltinOperator_POW:
loadPow(op, subg);
return;
case BuiltinOperator::BuiltinOperator_LOGICAL_NOT:
- loadLogicalNot(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::LOGICAL_NOT);
return;
case BuiltinOperator::BuiltinOperator_LOGICAL_OR:
- loadLogicalOr(op, subg);
+ loadElementwiseBinary<ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR>(
+ op, subg);
return;
case BuiltinOperator::BuiltinOperator_FILL:
loadFill(op, subg);
return;
case BuiltinOperator::BuiltinOperator_ZEROS_LIKE:
- loadZerosLike(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::ZEROS_LIKE);
return;
case BuiltinOperator::BuiltinOperator_TILE:
loadTile(op, subg);
@@ -2049,11 +1931,20 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
loadLogSoftmax(op, subg);
return;
case BuiltinOperator::BuiltinOperator_QUANTIZE:
- loadQuantize(op, subg);
+ loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::QUANTIZE);
return;
case BuiltinOperator::BuiltinOperator_SPACE_TO_DEPTH:
loadSpaceToDepth(op, subg);
return;
+ case BuiltinOperator::BuiltinOperator_L2_NORMALIZATION:
+ loadL2Normalization(op, subg);
+ break;
+ case BuiltinOperator::BuiltinOperator_LEAKY_RELU:
+ loadLeakyRelu(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_RANK:
+ loadRank(op, subg);
+ return;
default:
throw std::runtime_error(
std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op)));
diff --git a/runtime/onert/frontend/circle/src/circle_loader.cc b/runtime/onert/frontend/circle/src/circle_loader.cc
index 96dd4698a..92a9ee7a5 100644
--- a/runtime/onert/frontend/circle/src/circle_loader.cc
+++ b/runtime/onert/frontend/circle/src/circle_loader.cc
@@ -103,12 +103,14 @@ public:
// Set inputs
for (const std::int32_t input_ind : *circle_subg->inputs())
{
- subg->addInput(tensorIdxToOperandIdx(input_ind));
+ subg->addInput(tensorIdxToOperandIdx(input_ind),
+ _tensor_names.at(_tensor_to_operand[input_ind]));
}
// Set outputs
for (const std::int32_t output_ind : *circle_subg->outputs())
{
- subg->addOutput(tensorIdxToOperandIdx(output_ind));
+ subg->addOutput(tensorIdxToOperandIdx(output_ind),
+ _tensor_names.at(_tensor_to_operand[output_ind]));
}
// Create operations
for (const auto *op : *circle_subg->operators())
diff --git a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc
index 8ff6cbbfd..8e3d83db4 100644
--- a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc
@@ -83,6 +83,189 @@ uint32_t getUint32Scalar(Operands &operands, const OperandIndex index)
}
OperationFactory::Generator
+getElementwiseActivationGenerator(const onert::ir::operation::ElementwiseActivation::Type op_type,
+ float alpha = 0.f, float beta = 0.f)
+{
+ return [op_type, alpha, beta](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 1);
+ assert(init_param.output_count == 1);
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+
+ OperandIndexSequence inputs{init_param.inputs[0]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ operation::ElementwiseActivation::Param param;
+ param.op_type = op_type;
+ param.alpha = alpha;
+ param.beta = beta;
+
+ return new operation::ElementwiseActivation{inputs, outputs, param};
+ };
+}
+
+OperationFactory::Generator getElementwiseBinaryGenerator(
+ const onert::ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type)
+{
+ return [op_type](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 2);
+ assert(init_param.output_count == 1);
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Lefthand side operand
+ // 1 -> Righthand side operand
+
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ operation::ElementwiseBinary::Param param;
+ param.op_type = op_type;
+
+ return new operation::ElementwiseBinary{inputs, outputs, param};
+ };
+}
+
+OperationFactory::Generator
+getElementwiseUnaryGenerator(const onert::ir::operation::ElementwiseUnary::Type op_type)
+{
+ return [op_type](const OperationFactory::Param &init_param, Operands &operands) {
+ assert(init_param.input_count == 1);
+ assert(init_param.output_count == 1);
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+
+ OperandIndexSequence inputs{init_param.inputs[0]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ operation::ElementwiseUnary::Param param;
+ param.op_type = op_type;
+
+ if (op_type == operation::ElementwiseUnary::Type::CAST)
+ {
+ // NNAPI uses QUANT_UINT8_ASYMM to represent UINT8 type for ANEURALNETWORKS_CAST's
+ // input/output
+ if (operands.at(inputs.at(0)).typeInfo().type() == DataType::QUANT_UINT8_ASYMM)
+ {
+ replaceDataType(operands, inputs.at(0), DataType::UINT8);
+ }
+ if (operands.at(outputs.at(0)).typeInfo().type() == DataType::QUANT_UINT8_ASYMM)
+ {
+ replaceDataType(operands, outputs.at(0), DataType::UINT8);
+ }
+ }
+
+ return new operation::ElementwiseUnary{inputs, outputs, param};
+ };
+}
+
+OperationFactory::Generator
+getBinaryArithmeticGenerator(const onert::ir::operation::BinaryArithmetic::ArithmeticType op_type)
+{
+ return [op_type](const OperationFactory::Param &init_param, Operands &operands) {
+ assert(init_param.input_count == 3);
+ assert(init_param.output_count == 1);
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Lefthand side operand
+ // 1 -> Righthand side operand
+
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ operation::BinaryArithmetic::Param param;
+ param.arithmetic_type = op_type;
+ const auto activation_index = OperandIndex{init_param.inputs[2]};
+ param.activation =
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+
+ return new operation::BinaryArithmetic{inputs, outputs, param};
+ };
+}
+
+OperationFactory::Generator
+getPool2DGenerator(const onert::ir::operation::Pool2D::PoolType pool_type)
+{
+ return [pool_type](const OperationFactory::Param &init_param, Operands &operands) {
+ assert(init_param.input_count == 7 || init_param.input_count == 10);
+ assert(init_param.output_count == 1);
+
+ // In common
+ // 0 -> IFM Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ operation::Pool2D::Param param;
+ param.op_type = pool_type;
+ if (init_param.input_count == 7) // support implicit padding
+ {
+ // Each input should be interpreted as follows:
+ //
+ // 1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
+ // 2 -> Horizontal (over width) Stride Index
+ // 3 -> Vertial (over height) Stride Index
+ // 4 -> Filter Width Index
+ // 5 -> Filter Height Index
+ // 6 -> FuseCode (activation) Index
+
+ const auto padding_index = OperandIndex{init_param.inputs[1]};
+ const auto hstride_index = OperandIndex{init_param.inputs[2]};
+ const auto vstride_index = OperandIndex{init_param.inputs[3]};
+ const auto kw_index = OperandIndex{init_param.inputs[4]};
+ const auto kh_index = OperandIndex{init_param.inputs[5]};
+ const auto activation_index = OperandIndex{init_param.inputs[6]};
+
+ param.padding.type =
+ NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
+ param.stride = makeStride(operands, hstride_index, vstride_index);
+ param.kw = getUint32Scalar(operands, kw_index);
+ param.kh = operands.at(kh_index).asScalar<uint32_t>();
+ param.activation =
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ }
+ else // support explicit padding
+ {
+ // Each input should be interpreted as follows:
+ //
+ // 1 -> Padding_left index
+ // 2 -> Padding_right index
+ // 3 -> Padding_top index
+ // 4 -> Padding_bottom index
+ // 5 -> Horizontal (over width) Stride Index
+ // 6 -> Vertial (over height) Stride Index
+ // 7 -> Filter Width Index
+ // 8 -> Filter Height Index
+ // 9 -> FuseCode (activation) Index
+
+ const auto padding_left_index = OperandIndex{init_param.inputs[1]};
+ const auto padding_right_index = OperandIndex{init_param.inputs[2]};
+ const auto padding_top_index = OperandIndex{init_param.inputs[3]};
+ const auto padding_bottom_index = OperandIndex{init_param.inputs[4]};
+ const auto hstride_index = OperandIndex{init_param.inputs[5]};
+ const auto vstride_index = OperandIndex{init_param.inputs[6]};
+ const auto kw_index = OperandIndex{init_param.inputs[7]};
+ const auto kh_index = OperandIndex{init_param.inputs[8]};
+ const auto activation_index = OperandIndex{init_param.inputs[9]};
+
+ param.padding.type = PaddingType::EXPLICIT;
+ param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
+ padding_top_index, padding_bottom_index);
+ param.stride = makeStride(operands, hstride_index, vstride_index);
+ param.kw = getUint32Scalar(operands, kw_index);
+ param.kh = getUint32Scalar(operands, kh_index);
+ param.activation =
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ }
+ return new operation::Pool2D{inputs, outputs, param};
+ };
+}
+
+OperationFactory::Generator
getReduceGenerator(const onert::ir::operation::Reduce::ReduceType reduce_type)
{
return [reduce_type](const OperationFactory::Param &init_param, Operands &operands) {
@@ -133,79 +316,24 @@ Operation *createSimpleBinaryOp(const OperationFactory::Param &init_param, Opera
return new T{inputs, outputs};
}
-// A generator function for binary ops with no params
-template <typename T>
-Operation *createPool2DOp(const OperationFactory::Param &init_param, Operands &operands)
+OperationFactory::Generator getComparisonGenerator(operation::Comparison::ComparisonType type)
{
- assert(init_param.input_count == 7 || init_param.input_count == 10);
- assert(init_param.output_count == 1);
+ return [type](const OperationFactory::Param &init_param, Operands &) -> Operation * {
+ assert(init_param.input_count == 2 && init_param.output_count == 1);
- // In common
- // 0 -> IFM Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
- OperandIndexSequence outputs{init_param.outputs[0]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
- typename T::Param param;
- if (init_param.input_count == 7) // support implicit padding
- {
// Each input should be interpreted as follows:
//
- // 1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
- // 2 -> Horizontal (over width) Stride Index
- // 3 -> Vertial (over height) Stride Index
- // 4 -> Filter Width Index
- // 5 -> Filter Height Index
- // 6 -> FuseCode (activation) Index
-
- const auto padding_index = OperandIndex{init_param.inputs[1]};
- const auto hstride_index = OperandIndex{init_param.inputs[2]};
- const auto vstride_index = OperandIndex{init_param.inputs[3]};
- const auto kw_index = OperandIndex{init_param.inputs[4]};
- const auto kh_index = OperandIndex{init_param.inputs[5]};
- const auto activation_index = OperandIndex{init_param.inputs[6]};
+ // 0 -> input0 Tensor Index
+ // 1 -> input1 Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
- param.padding.type =
- NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
- param.stride = makeStride(operands, hstride_index, vstride_index);
- param.kw = getUint32Scalar(operands, kw_index);
- param.kh = operands.at(kh_index).asScalar<uint32_t>();
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
- }
- else // support explicit padding
- {
- // Each input should be interpreted as follows:
- //
- // 1 -> Padding_left index
- // 2 -> Padding_right index
- // 3 -> Padding_top index
- // 4 -> Padding_bottom index
- // 5 -> Horizontal (over width) Stride Index
- // 6 -> Vertial (over height) Stride Index
- // 7 -> Filter Width Index
- // 8 -> Filter Height Index
- // 9 -> FuseCode (activation) Index
-
- const auto padding_left_index = OperandIndex{init_param.inputs[1]};
- const auto padding_right_index = OperandIndex{init_param.inputs[2]};
- const auto padding_top_index = OperandIndex{init_param.inputs[3]};
- const auto padding_bottom_index = OperandIndex{init_param.inputs[4]};
- const auto hstride_index = OperandIndex{init_param.inputs[5]};
- const auto vstride_index = OperandIndex{init_param.inputs[6]};
- const auto kw_index = OperandIndex{init_param.inputs[7]};
- const auto kh_index = OperandIndex{init_param.inputs[8]};
- const auto activation_index = OperandIndex{init_param.inputs[9]};
-
- param.padding.type = PaddingType::EXPLICIT;
- param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
- padding_top_index, padding_bottom_index);
- param.stride = makeStride(operands, hstride_index, vstride_index);
- param.kw = getUint32Scalar(operands, kw_index);
- param.kh = getUint32Scalar(operands, kh_index);
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
- }
- return new T{inputs, outputs, param};
+ operation::Comparison::Param param;
+ param.comparison_type = type;
+
+ return new operation::Comparison{inputs, outputs, param};
+ };
}
} // namespace
@@ -295,9 +423,9 @@ OperationFactory::OperationFactory()
return new operation::DepthwiseConv2D{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_MAX_POOL_2D] = createPool2DOp<operation::MaxPool2D>;
+ _map[ANEURALNETWORKS_MAX_POOL_2D] = getPool2DGenerator(operation::Pool2D::PoolType::MAX);
- _map[ANEURALNETWORKS_AVERAGE_POOL_2D] = createPool2DOp<operation::AvgPool2D>;
+ _map[ANEURALNETWORKS_AVERAGE_POOL_2D] = getPool2DGenerator(operation::Pool2D::PoolType::AVG);
_map[ANEURALNETWORKS_CONCATENATION] = [](const OperationFactory::Param &init_param,
Operands &operands) {
@@ -383,27 +511,8 @@ OperationFactory::OperationFactory()
return new operation::Softmax{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_CAST] = [](const OperationFactory::Param &init_param, Operands &operands) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- // 0 -> input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- // NNAPI uses QUANT_UINT8_ASYMM to represent UINT8 type for ANEURALNETWORKS_CAST's input/output
- if (operands.at(inputs.at(0)).typeInfo().type() == DataType::QUANT_UINT8_ASYMM)
- {
- replaceDataType(operands, inputs.at(0), DataType::UINT8);
- }
- if (operands.at(outputs.at(0)).typeInfo().type() == DataType::QUANT_UINT8_ASYMM)
- {
- replaceDataType(operands, outputs.at(0), DataType::UINT8);
- }
-
- return new operation::Cast{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_CAST] =
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::CAST);
// ANEURALNETWORKS_CAST_EX is deprecated
// TODO Remove ANEURALNETWORKS_CAST_EX
@@ -416,7 +525,8 @@ OperationFactory::OperationFactory()
// inputCount is either 7 or 10 acccording to NN API specification.
// - Padding is implicit when inputCount is 7
// - Padding is explicit when inputCount is 10
- assert(init_param.input_count == 7 || init_param.input_count == 10);
+ assert(init_param.input_count == 7 || init_param.input_count == 10 ||
+ init_param.input_count == 13);
assert(init_param.output_count == 1);
// 0 -> IFM Tensor Index
@@ -427,7 +537,6 @@ OperationFactory::OperationFactory()
OperandIndexSequence outputs{init_param.outputs[0]};
Conv2D::Param param;
-
if (init_param.input_count == 7) // support implicit padding
{
// Each input should be interpreted as follows:
@@ -445,6 +554,10 @@ OperationFactory::OperationFactory()
param.padding.type =
NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
param.stride = makeStride(operands, hstride_index, vstride_index);
+
+ param.dilation.width_factor = 1;
+ param.dilation.height_factor = 1;
+
param.activation =
NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
}
@@ -472,34 +585,62 @@ OperationFactory::OperationFactory()
param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
padding_top_index, padding_bottom_index);
param.stride = makeStride(operands, hstride_index, vstride_index);
+
+ param.dilation.width_factor = 1;
+ param.dilation.height_factor = 1;
+
param.activation =
NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
}
+ else if (init_param.input_count == 13) // support dilation
+ {
+ // Each input should be interpreted as follows:
+ //
+ // 3 -> Padding_left Index
+ // 4 -> Padding_right Index
+ // 5 -> Padding_top Index
+ // 6 -> Padding_bottom Index
+ // 7 -> Stride (width) Index
+ // 8 -> Stride (height) Index
+ // 9 -> Activation Index
+ // 11 -> Dilation (width_factor) Index
+ // 12 -> Dilation (height_factor) INdex
- return new Conv2D{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_ADD] = [](const OperationFactory::Param &init_param, Operands &operands) {
- assert(init_param.input_count == 3);
- assert(init_param.output_count == 1);
+ const auto padding_left_index = OperandIndex{init_param.inputs[3]};
+ const auto padding_right_index = OperandIndex{init_param.inputs[4]};
+ const auto padding_top_index = OperandIndex{init_param.inputs[5]};
+ const auto padding_bottom_index = OperandIndex{init_param.inputs[6]};
+ const auto hstride_index = OperandIndex{init_param.inputs[7]};
+ const auto vstride_index = OperandIndex{init_param.inputs[8]};
+ const auto activation_index = OperandIndex{init_param.inputs[9]};
+ const auto width_factor_index = OperandIndex{init_param.inputs[11]};
+ const auto height_factor_index = OperandIndex{init_param.inputs[12]};
- // Each input should be interpreted as follows:
- //
- // 0 -> Lefthand side operand
- // 1 -> Righthand side operand
+ param.padding.type = PaddingType::EXPLICIT;
+ param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
+ padding_top_index, padding_bottom_index);
+ param.stride = makeStride(operands, hstride_index, vstride_index);
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
- OperandIndexSequence outputs{init_param.outputs[0]};
+ auto width_factor = operands.at(width_factor_index).asScalar<int32_t>();
+ auto height_factor = operands.at(height_factor_index).asScalar<int32_t>();
- operation::Add::Param param;
+ param.dilation.width_factor = width_factor;
+ param.dilation.height_factor = height_factor;
- const auto activation_index = OperandIndex{init_param.inputs[2]};
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ param.activation =
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ }
+ else
+ {
+ throw std::runtime_error{"Conv2D: unsupported input operand count"};
+ }
- return new operation::Add{inputs, outputs, param};
+ return new Conv2D{inputs, outputs, param};
};
+ _map[ANEURALNETWORKS_ADD] =
+ getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::ADD);
+
_map[ANEURALNETWORKS_ADDV2_EX] = _map[ANEURALNETWORKS_ADD];
_map[ANEURALNETWORKS_REDUCE_SUM] =
@@ -509,26 +650,8 @@ OperationFactory::OperationFactory()
// TODO Remove ANEURALNETWORKS_REDUCE_SUM_EX
_map[ANEURALNETWORKS_REDUCE_SUM_EX] = _map[ANEURALNETWORKS_REDUCE_SUM];
- _map[ANEURALNETWORKS_SUB] = [](const OperationFactory::Param &init_param, Operands &operands) {
- assert(init_param.input_count == 3);
- assert(init_param.output_count == 1);
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Lefthand side operand
- // 1 -> Righthand side operand
-
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- operation::Sub::Param param;
-
- const auto activation_index = OperandIndex{init_param.inputs[2]};
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
-
- return new operation::Sub{inputs, outputs, param};
- };
+ _map[ANEURALNETWORKS_SUB] =
+ getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::SUB);
_map[ANEURALNETWORKS_SLICE] = [](const OperationFactory::Param &init_param, Operands &) {
assert(init_param.input_count == 3 && init_param.output_count == 1);
@@ -611,27 +734,8 @@ OperationFactory::OperationFactory()
return new operation::Transpose{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_MUL] = [](const OperationFactory::Param &init_param, Operands &operands) {
- assert(init_param.input_count == 3 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> LHS Tensor Index
- // 1 -> RHS Tensor Index
- // 2 -> Activation Index
-
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Mul::Param param;
-
- const auto activation_index = OperandIndex{init_param.inputs[2]};
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
-
- return new operation::Mul{inputs, outputs, param};
- };
+ _map[ANEURALNETWORKS_MUL] =
+ getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::MUL);
_map[ANEURALNETWORKS_SQUEEZE] = [](const OperationFactory::Param &init_param,
Operands &operands) {
@@ -672,34 +776,18 @@ OperationFactory::OperationFactory()
return new operation::Squeeze{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_TANH] = CreateSimpleUnaryOp<operation::Tanh>;
+ _map[ANEURALNETWORKS_TANH] = getElementwiseActivationGenerator(
+ onert::ir::operation::ElementwiseActivation::Type::TANH, 1.f, 1.f);
- _map[ANEURALNETWORKS_LOG] = CreateSimpleUnaryOp<operation::Log>;
+ _map[ANEURALNETWORKS_LOG] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::LOG);
- _map[ANEURALNETWORKS_LOGISTIC] = CreateSimpleUnaryOp<operation::Logistic>;
+ _map[ANEURALNETWORKS_LOGISTIC] = getElementwiseActivationGenerator(
+ onert::ir::operation::ElementwiseActivation::Type::LOGISTIC);
- _map[ANEURALNETWORKS_DIV] = [](const OperationFactory::Param &init_param, Operands &operands) {
- assert(init_param.input_count == 3 && init_param.output_count == 1);
+ _map[ANEURALNETWORKS_DIV] =
+ getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::DIV);
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> LHS Tensor Index
- // 1 -> RHS Tensor Index
- // 2 -> Activation Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Div::Param param;
-
- const auto activation_index = OperandIndex{init_param.inputs[2]};
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
-
- return new operation::Div{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_EXP] = CreateSimpleUnaryOp<operation::Exp>;
+ _map[ANEURALNETWORKS_EXP] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::EXP);
// ANEURALNETWORKS_EXP_EX is deprecated
// TODO Remove ANEURALNETWORKS_EXP_EX
@@ -710,39 +798,17 @@ OperationFactory::OperationFactory()
// 1 -> Axis Tensor Index
_map[ANEURALNETWORKS_EXPAND_DIMS] = createSimpleBinaryOp<operation::ExpandDims>;
- _map[ANEURALNETWORKS_GREATER] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input0 Tensor Index
- // 1 -> input1 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Comparison::Param param;
- param.comparison_type = operation::Comparison::ComparisonType::Greater;
-
- return new operation::Comparison{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_GREATER_EQUAL] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input0 Tensor Index
- // 1 -> input1 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Comparison::Param param;
- param.comparison_type = operation::Comparison::ComparisonType::GreaterEqual;
-
- return new operation::Comparison{inputs, outputs, param};
- };
+ _map[ANEURALNETWORKS_GREATER] =
+ getComparisonGenerator(operation::Comparison::ComparisonType::Greater);
+ _map[ANEURALNETWORKS_GREATER_EQUAL] =
+ getComparisonGenerator(operation::Comparison::ComparisonType::GreaterEqual);
+ _map[ANEURALNETWORKS_LESS] = getComparisonGenerator(operation::Comparison::ComparisonType::Less);
+ _map[ANEURALNETWORKS_LESS_EQUAL] =
+ getComparisonGenerator(operation::Comparison::ComparisonType::LessEqual);
+ _map[ANEURALNETWORKS_NOT_EQUAL] =
+ getComparisonGenerator(operation::Comparison::ComparisonType::NotEqual);
+ _map[ANEURALNETWORKS_EQUAL] =
+ getComparisonGenerator(operation::Comparison::ComparisonType::Equal);
// ANEURALNETWORKS_GREATER_EQUAL_EX is deprecated
// TODO Remove ANEURALNETWORKS_GREATER_EQUAL_EX
@@ -767,40 +833,6 @@ OperationFactory::OperationFactory()
return new operation::Comparison{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_LESS] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input0 Tensor Index
- // 1 -> input1 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Comparison::Param param;
- param.comparison_type = operation::Comparison::ComparisonType::Less;
-
- return new operation::Comparison{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_LESS_EQUAL] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input0 Tensor Index
- // 1 -> input1 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Comparison::Param param;
- param.comparison_type = operation::Comparison::ComparisonType::LessEqual;
-
- return new operation::Comparison{inputs, outputs, param};
- };
-
// ANEURALNETWORKS_LESS_EX is deprecated
// TODO Remove ANEURALNETWORKS_LESS_EX
_map[ANEURALNETWORKS_LESS_EX] = [](const OperationFactory::Param &init_param,
@@ -837,23 +869,6 @@ OperationFactory::OperationFactory()
// TODO Remove ANEURALNETWORKS_REDUCE_MAX_EX
_map[ANEURALNETWORKS_REDUCE_MAX_EX] = _map[ANEURALNETWORKS_REDUCE_MAX];
- _map[ANEURALNETWORKS_NOT_EQUAL] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input1 Tensor Index
- // 1 -> input2 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Comparison::Param param;
- param.comparison_type = operation::Comparison::ComparisonType::NotEqual;
-
- return new operation::Comparison{inputs, outputs, param};
- };
-
// ANEURALNETWORKS_NOT_EQUAL_EX is deprecated
// TODO Remove ANEURALNETWORKS_NOT_EQUAL_EX
_map[ANEURALNETWORKS_NOT_EQUAL_EX] = [](const OperationFactory::Param &init_param,
@@ -877,7 +892,8 @@ OperationFactory::OperationFactory()
return new operation::Comparison{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_LOGICAL_AND] = createSimpleBinaryOp<operation::LogicalAnd>;
+ _map[ANEURALNETWORKS_LOGICAL_AND] = getElementwiseBinaryGenerator(
+ operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND);
// ANEURALNETWORKS_LOGICAL_AND_EX is deprecated
// TODO Remove ANEURALNETWORKS_LOGICAL_AND_EX
@@ -898,10 +914,14 @@ OperationFactory::OperationFactory()
replaceDataType(operands, inputs.at(1), DataType::BOOL8);
replaceDataType(operands, outputs.at(0), DataType::BOOL8);
- return new operation::LogicalAnd{inputs, outputs};
+ operation::ElementwiseBinary::Param param;
+ param.op_type = operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND;
+
+ return new operation::ElementwiseBinary{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_RSQRT] = CreateSimpleUnaryOp<operation::RSQRT>;
+ _map[ANEURALNETWORKS_RSQRT] =
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::RSQRT);
_map[ANEURALNETWORKS_SELECT] = [](const OperationFactory::Param &init_param, Operands &) {
assert(init_param.input_count == 3 && init_param.output_count == 1);
@@ -937,7 +957,9 @@ OperationFactory::OperationFactory()
// TODO Remove ANEURALNETWORKS_RSQRT_EX
_map[ANEURALNETWORKS_RSQRT_EX] = _map[ANEURALNETWORKS_RSQRT];
- _map[ANEURALNETWORKS_RELU] = CreateSimpleUnaryOp<operation::ReLU>;
+ _map[ANEURALNETWORKS_RELU] =
+ getElementwiseActivationGenerator(onert::ir::operation::ElementwiseActivation::Type::RELU,
+ onert::ir::operation::ElementwiseActivation::infinity, 0);
_map[ANEURALNETWORKS_RESIZE_BILINEAR] = [](const OperationFactory::Param &init_param,
Operands &operands) {
@@ -960,9 +982,11 @@ OperationFactory::OperationFactory()
return new operation::ResizeBilinear{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_RELU1] = CreateSimpleUnaryOp<operation::ReLU1>;
+ _map[ANEURALNETWORKS_RELU1] = getElementwiseActivationGenerator(
+ onert::ir::operation::ElementwiseActivation::Type::RELU, 1.f, -1.f);
- _map[ANEURALNETWORKS_RELU6] = CreateSimpleUnaryOp<operation::ReLU6>;
+ _map[ANEURALNETWORKS_RELU6] = getElementwiseActivationGenerator(
+ onert::ir::operation::ElementwiseActivation::Type::RELU, 6.f, 0.f);
_map[ANEURALNETWORKS_REVERSE_EX] = [](const OperationFactory::Param &init_param, Operands &) {
assert(init_param.input_count == 2 && init_param.output_count == 1);
@@ -1009,17 +1033,8 @@ OperationFactory::OperationFactory()
return new operation::RNN{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_FLOOR] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- // 0 -> input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- return new operation::Floor{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_FLOOR] =
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::FLOOR);
_map[ANEURALNETWORKS_SPACE_TO_BATCH_ND] = [](const OperationFactory::Param &init_param,
Operands &) {
@@ -1059,7 +1074,7 @@ OperationFactory::OperationFactory()
return new operation::SpaceToDepth{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_L2_POOL_2D] = createPool2DOp<operation::L2Pool2D>;
+ _map[ANEURALNETWORKS_L2_POOL_2D] = getPool2DGenerator(operation::Pool2D::PoolType::L2);
_map[ANEURALNETWORKS_EMBEDDING_LOOKUP] = [](const OperationFactory::Param &init_param,
Operands &) {
@@ -1157,35 +1172,15 @@ OperationFactory::OperationFactory()
return new operation::TransposeConv{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_SQRT] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- // 0 -> input Tensor Index
-
- OperandIndexSequence inputs{init_param.inputs[0]};
- return new operation::SQRT{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_SQRT] =
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::SQRT);
// ANEURALNETWORKS_SQRT_EX is deprecated
// TODO Remove ANEURALNETWORKS_SQRT_EX
_map[ANEURALNETWORKS_SQRT_EX] = _map[ANEURALNETWORKS_SQRT];
- _map[ANEURALNETWORKS_LOGICAL_OR] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input0 Tensor Index
- // 1 -> input1 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- return new operation::LogicalOr{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_LOGICAL_OR] = getElementwiseBinaryGenerator(
+ operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR);
// ANEURALNETWORKS_LOGICAL_OR_EX is deprecated
// TODO Remove ANEURALNETWORKS_LOGICAL_OR_EX
@@ -1206,10 +1201,14 @@ OperationFactory::OperationFactory()
replaceDataType(operands, inputs.at(1), DataType::BOOL8);
replaceDataType(operands, outputs.at(0), DataType::BOOL8);
- return new operation::LogicalOr{inputs, outputs};
+ operation::ElementwiseBinary::Param param;
+ param.op_type = operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR;
+
+ return new operation::ElementwiseBinary{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_LOGICAL_NOT] = CreateSimpleUnaryOp<operation::LogicalNot>;
+ _map[ANEURALNETWORKS_LOGICAL_NOT] =
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::LOGICAL_NOT);
// ANEURALNETWORKS_LOGICAL_NOT_EX is deprecated
// TODO Remove ANEURALNETWORKS_LOGICAL_NOT_EX
@@ -1228,7 +1227,10 @@ OperationFactory::OperationFactory()
replaceDataType(operands, inputs.at(0), DataType::BOOL8);
replaceDataType(operands, outputs.at(0), DataType::BOOL8);
- return new operation::LogicalNot{inputs, outputs};
+ operation::ElementwiseUnary::Param param;
+ param.op_type = operation::ElementwiseUnary::Type::LOGICAL_NOT;
+
+ return new operation::ElementwiseUnary{inputs, outputs, param};
};
_map[ANEURALNETWORKS_LSTM] = [](const OperationFactory::Param &init_param, Operands &operands) {
@@ -1306,23 +1308,6 @@ OperationFactory::OperationFactory()
return new operation::LSTM{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_EQUAL] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input0 Tensor Index
- // 1 -> input1 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- operation::Comparison::Param param;
- param.comparison_type = operation::Comparison::ComparisonType::Equal;
-
- return new operation::Comparison{inputs, outputs, param};
- };
-
// ANEURALNETWORKS_EQUAL_EX is deprecated
// TODO Remove ANEURALNETWORKS_EQUAL_EX
_map[ANEURALNETWORKS_EQUAL_EX] = [](const OperationFactory::Param &init_param,
@@ -1409,13 +1394,13 @@ OperationFactory::OperationFactory()
// TODO Remove ANEURALNETWORKS_GATHER_EX
_map[ANEURALNETWORKS_GATHER_EX] = _map[ANEURALNETWORKS_GATHER];
- _map[ANEURALNETWORKS_NEG] = CreateSimpleUnaryOp<operation::Neg>;
+ _map[ANEURALNETWORKS_NEG] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::NEG);
// ANEURALNETWORKS_NEG_EX is deprecated
// TODO Remove ANEURALNETWORKS_NEG_EX
_map[ANEURALNETWORKS_NEG_EX] = _map[ANEURALNETWORKS_NEG];
- _map[ANEURALNETWORKS_ABS] = CreateSimpleUnaryOp<operation::Abs>;
+ _map[ANEURALNETWORKS_ABS] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ABS);
// ANEURALNETWORKS_ABS_EX is deprecated
// TODO Remove ANEURALNETWORKS_ABS_EX
@@ -1434,6 +1419,8 @@ OperationFactory::OperationFactory()
operation::ArgMax::Param param;
param.axis = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<std::int32_t>();
+ // NNAPI ARGMAX output type is always int32
+ param.output_type = DataType::INT32;
return new operation::ArgMax{inputs, outputs, param};
};
@@ -1442,7 +1429,8 @@ OperationFactory::OperationFactory()
// TODO Remove ANEURALNETWORKS_ARGMAX_EX
_map[ANEURALNETWORKS_ARGMAX_EX] = _map[ANEURALNETWORKS_ARGMAX];
- _map[ANEURALNETWORKS_DEQUANTIZE] = CreateSimpleUnaryOp<operation::Dequantize>;
+ _map[ANEURALNETWORKS_DEQUANTIZE] =
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::DEQUANTIZE);
_map[ANEURALNETWORKS_MEAN] = [](const OperationFactory::Param &init_param, Operands &operands) {
assert(init_param.input_count == 3 && init_param.output_count == 1);
@@ -1600,9 +1588,11 @@ OperationFactory::OperationFactory()
_map[ANEURALNETWORKS_PAD_V2] = _map[ANEURALNETWORKS_PAD];
- _map[ANEURALNETWORKS_MINIMUM] = createSimpleBinaryOp<operation::Min>;
+ _map[ANEURALNETWORKS_MINIMUM] =
+ getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MIN);
- _map[ANEURALNETWORKS_MAXIMUM] = createSimpleBinaryOp<operation::Max>;
+ _map[ANEURALNETWORKS_MAXIMUM] =
+ getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MAX);
_map[ANEURALNETWORKS_ONE_HOT_EX] = [](const OperationFactory::Param &init_param,
Operands &operands) {
@@ -1628,23 +1618,10 @@ OperationFactory::OperationFactory()
return new operation::OneHot{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_COS_EX] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence inputs{init_param.inputs[0]};
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- return new operation::Cos{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_COS_EX] =
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::COS);
- _map[ANEURALNETWORKS_SIN] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence inputs{init_param.inputs[0]};
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- return new operation::Sin{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_SIN] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::SIN);
_map[ANEURALNETWORKS_SHAPE_EX] = [](const OperationFactory::Param &init_param, Operands &) {
assert(init_param.input_count == 1 && init_param.output_count == 1);
@@ -1658,17 +1635,8 @@ OperationFactory::OperationFactory()
_map[ANEURALNETWORKS_REDUCE_PROD] =
getReduceGenerator(onert::ir::operation::Reduce::ReduceType::PROD);
- _map[ANEURALNETWORKS_ROUND_EX] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- // 0 -> input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- return new operation::Round{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_ROUND_EX] =
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ROUND);
_map[ANEURALNETWORKS_RANGE_EX] = [](const OperationFactory::Param &init_param, Operands &) {
assert(init_param.input_count == 3 && init_param.output_count == 1);
@@ -1695,18 +1663,8 @@ OperationFactory::OperationFactory()
// 1 -> A 1-D tensor, specifying the value
_map[ANEURALNETWORKS_FILL_EX] = createSimpleBinaryOp<operation::Fill>;
- _map[ANEURALNETWORKS_ZEROS_LIKE_EX] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- // 0 -> input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- return new operation::ZerosLike{inputs, outputs};
- };
-
+ _map[ANEURALNETWORKS_ZEROS_LIKE_EX] =
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ZEROS_LIKE);
// Each input should be interpreted as follows:
// 0 -> Input Tensor Index
// 1 -> Multiple Tensor Index
@@ -1845,14 +1803,8 @@ OperationFactory::OperationFactory()
return new operation::LogSoftmax{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_QUANTIZE] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence inputs{init_param.inputs[0]};
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- return new operation::Quantize{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_QUANTIZE] =
+ getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::QUANTIZE);
}
Operation *OperationFactory::create(ANeuralNetworksOperationType type,
diff --git a/runtime/onert/frontend/tflite/src/tflite_loader.cc b/runtime/onert/frontend/tflite/src/tflite_loader.cc
index 86c2c6bc7..7eef15717 100644
--- a/runtime/onert/frontend/tflite/src/tflite_loader.cc
+++ b/runtime/onert/frontend/tflite/src/tflite_loader.cc
@@ -90,12 +90,14 @@ public:
// Set inputs
for (const std::int32_t input_ind : *tflite_subg->inputs())
{
- subg->addInput(tensorIdxToOperandIdx(input_ind));
+ subg->addInput(tensorIdxToOperandIdx(input_ind),
+ _tensor_names.at(_tensor_to_operand[input_ind]));
}
// Set outputs
for (const std::int32_t output_ind : *tflite_subg->outputs())
{
- subg->addOutput(tensorIdxToOperandIdx(output_ind));
+ subg->addOutput(tensorIdxToOperandIdx(output_ind),
+ _tensor_names.at(_tensor_to_operand[output_ind]));
}
// Create operations
for (const auto *op : *tflite_subg->operators())
diff --git a/runtime/onert/test/core/compiler/Scheduler.cc b/runtime/onert/test/core/compiler/Scheduler.cc
index 94f51ddd6..50f3964db 100644
--- a/runtime/onert/test/core/compiler/Scheduler.cc
+++ b/runtime/onert/test/core/compiler/Scheduler.cc
@@ -22,9 +22,7 @@
#include <ir/TypeInfo.h>
#include <ir/DataType.h>
-#include <ir/operation/Add.h>
-#include <ir/operation/Sub.h>
-#include <ir/operation/Mul.h>
+#include <ir/operation/BinaryArithmetic.h>
#include <ir/operation/FullyConnected.h>
#include <gtest/gtest.h>
@@ -209,8 +207,7 @@ using OIS = OperandIndexSequence;
template <typename NodeT, typename... Types>
OperationIndex create(std::shared_ptr<Graph> graph, Types &&... args)
{
- typename NodeT::Param op_params{Activation::NONE};
- auto op = std::make_unique<NodeT>(std::forward<Types>(args)..., op_params);
+ auto op = std::make_unique<NodeT>(std::forward<Types>(args)...);
auto op_idx = graph->addOperation(std::move(op));
// For now in scheduler test all operations in tested graphs has same size (for simplicity)
assert(calcOpSize(graph, op_idx) == OPERATION_SIZE);
@@ -227,17 +224,20 @@ std::shared_ptr<Graph> createStraightGraph()
auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- create<Add>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx});
+ BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params);
// Create sub node
auto sub_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- create<Sub>(graph, OIS{add_out_idx, sub_const_idx}, OIS{sub_out_idx});
+ BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{add_out_idx, sub_const_idx}, OIS{sub_out_idx}, sub_op_params);
// Create mul node
auto mul_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
auto mul_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- create<Mul>(graph, OIS{sub_out_idx, mul_const_idx}, OIS{mul_out_idx});
+ BinaryArithmetic::Param mul_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{sub_out_idx, mul_const_idx}, OIS{mul_out_idx}, mul_op_params);
graph->finishBuilding();
return graph;
@@ -261,31 +261,39 @@ std::shared_ptr<Graph> createBranchedGraph()
auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- create<Add>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx});
+ BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params);
// Create mul1 node
auto mul1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
auto mul1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- create<Mul>(graph, OIS{add_out_idx, mul1_const_idx}, OIS{mul1_out_idx});
+ BinaryArithmetic::Param mul1_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{add_out_idx, mul1_const_idx}, OIS{mul1_out_idx},
+ mul1_op_params);
// Create mul2 node
auto mul2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
auto mul2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- create<Mul>(graph, OIS{mul1_out_idx, mul2_const_idx}, OIS{mul2_out_idx});
+ BinaryArithmetic::Param mul2_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{mul1_out_idx, mul2_const_idx}, OIS{mul2_out_idx},
+ mul2_op_params);
// Create fc1 node
auto fc1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
auto fc1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- create<FullyConnected>(graph, OIS{add_out_idx, fc1_const_idx}, OIS{fc1_out_idx});
+ FullyConnected::Param fc1_op_params{Activation::NONE};
+ create<FullyConnected>(graph, OIS{add_out_idx, fc1_const_idx}, OIS{fc1_out_idx}, fc1_op_params);
// Create fc2 node
auto fc2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
auto fc2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- create<FullyConnected>(graph, OIS{fc1_out_idx, fc2_const_idx}, OIS{fc2_out_idx});
+ FullyConnected::Param fc2_op_params{Activation::NONE};
+ create<FullyConnected>(graph, OIS{fc1_out_idx, fc2_const_idx}, OIS{fc2_out_idx}, fc2_op_params);
- // Create add2 node
+ // Create sub node
auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- create<Sub>(graph, OIS{mul2_out_idx, fc2_out_idx}, OIS{sub_out_idx});
+ BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{mul2_out_idx, fc2_out_idx}, OIS{sub_out_idx}, sub_op_params);
graph->finishBuilding();
return graph;
diff --git a/runtime/onert/test/core/exec/ExecInstance.cc b/runtime/onert/test/core/exec/ExecInstance.cc
index 0fcf372c3..806b47ecc 100644
--- a/runtime/onert/test/core/exec/ExecInstance.cc
+++ b/runtime/onert/test/core/exec/ExecInstance.cc
@@ -20,7 +20,7 @@
#include "ir/Graph.h"
#include "compiler/Compiler.h"
#include "exec/Execution.h"
-#include "ir/operation/Add.h"
+#include "ir/operation/BinaryArithmetic.h"
namespace
{
@@ -54,16 +54,20 @@ public:
.at(operand_rhs2)
.data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
// 2nd add operations (result2 <= result1 + rhs2)
- operation::Add::Param param1;
+ operation::BinaryArithmetic::Param param1;
+ param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
param1.activation = Activation::NONE;
auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
auto output_set1 = OperandIndexSequence{operand_result1};
- graph->addOperation(std::make_unique<operation::Add>(input_set1, output_set1, param1));
- operation::Add::Param param2;
+ graph->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
+ operation::BinaryArithmetic::Param param2;
+ param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
param2.activation = Activation::NONE;
auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
auto output_set2 = OperandIndexSequence{operand_result2};
- graph->addOperation(std::make_unique<operation::Add>(input_set2, output_set2, param2));
+ graph->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
// Identify model inputs and outputs
graph->addInput(operand_lhs);
graph->addInput(operand_rhs1);
diff --git a/runtime/onert/test/core/interp/ExecManager.cc b/runtime/onert/test/core/interp/ExecManager.cc
index 2e295ef40..09190bc58 100644
--- a/runtime/onert/test/core/interp/ExecManager.cc
+++ b/runtime/onert/test/core/interp/ExecManager.cc
@@ -21,7 +21,7 @@
#include "ir/Graph.h"
#include "interp/InterpExecutor.h"
#include "exec/Execution.h"
-#include "ir/operation/Add.h"
+#include "ir/operation/BinaryArithmetic.h"
namespace
{
@@ -57,11 +57,13 @@ protected:
// Add operations
- operation::Add::Param param;
+ operation::BinaryArithmetic::Param param;
+ param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
param.activation = Activation::NONE;
auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
auto output_set = OperandIndexSequence{operand_result};
- _graph->addOperation(std::make_unique<operation::Add>(input_set, output_set, param));
+ _graph->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
// Identify model inputs and outputs
@@ -112,17 +114,21 @@ protected:
// 2nd add operations (result2 <= result1 + rhs2)
- operation::Add::Param param1;
+ operation::BinaryArithmetic::Param param1;
+ param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
param1.activation = Activation::NONE;
auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
auto output_set1 = OperandIndexSequence{operand_result1};
- _graph->addOperation(std::make_unique<operation::Add>(input_set1, output_set1, param1));
+ _graph->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
- operation::Add::Param param2;
+ operation::BinaryArithmetic::Param param2;
+ param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
param2.activation = Activation::NONE;
auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
auto output_set2 = OperandIndexSequence{operand_result2};
- _graph->addOperation(std::make_unique<operation::Add>(input_set2, output_set2, param2));
+ _graph->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
// Identify model inputs and outputs
@@ -170,11 +176,13 @@ protected:
// Add operations
- operation::Add::Param param;
+ operation::BinaryArithmetic::Param param;
+ param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
param.activation = Activation::NONE;
auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
auto output_set = OperandIndexSequence{operand_result};
- _graph->addOperation(std::make_unique<operation::Add>(input_set, output_set, param));
+ _graph->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
// Identify model inputs and outputs
diff --git a/runtime/onert/test/util/ShapeInference.cc b/runtime/onert/test/util/ShapeInference.cc
index 63a948d7b..aab33fab5 100644
--- a/runtime/onert/test/util/ShapeInference.cc
+++ b/runtime/onert/test/util/ShapeInference.cc
@@ -47,8 +47,9 @@ TEST(ShapeInference, Pool2DNodeSame)
Stride stride{3, 7};
Padding padding{PaddingType::SAME};
- operation::AvgPool2D::Param avg_pool_param{3, 6, stride, padding, Activation::NONE};
- auto infered_out_shape = onert::shape_inference::inferAvgPoolShape(in_shape, avg_pool_param);
+ operation::Pool2D::Param avg_pool_param{
+ operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+ auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
ASSERT_EQ(infered_out_shape.rank(), 4);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
@@ -56,8 +57,9 @@ TEST(ShapeInference, Pool2DNodeSame)
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
- operation::MaxPool2D::Param max_pool_param{3, 6, stride, padding, Activation::NONE};
- infered_out_shape = onert::shape_inference::inferMaxPoolShape(in_shape, max_pool_param);
+ operation::Pool2D::Param max_pool_param{
+ operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+ infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
ASSERT_EQ(infered_out_shape.rank(), 4);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
@@ -72,8 +74,9 @@ TEST(ShapeInference, Pool2DNodeValid)
Stride stride{3, 7};
Padding padding{PaddingType::VALID};
- operation::AvgPool2D::Param avg_pool_param{3, 6, stride, padding, Activation::NONE};
- auto infered_out_shape = onert::shape_inference::inferAvgPoolShape(in_shape, avg_pool_param);
+ operation::Pool2D::Param avg_pool_param{
+ operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+ auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
ASSERT_EQ(infered_out_shape.rank(), 4);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
@@ -81,8 +84,9 @@ TEST(ShapeInference, Pool2DNodeValid)
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
- operation::MaxPool2D::Param max_pool_param{3, 6, stride, padding, Activation::NONE};
- infered_out_shape = onert::shape_inference::inferMaxPoolShape(in_shape, max_pool_param);
+ operation::Pool2D::Param max_pool_param{
+ operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+ infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
ASSERT_EQ(infered_out_shape.rank(), 4);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
@@ -98,8 +102,9 @@ TEST(ShapeInference, Pool2DNodeExplicit)
Stride stride{3, 7};
Padding padding{4, 3, 2, 1};
- operation::AvgPool2D::Param avg_pool_param{3, 6, stride, padding, Activation::NONE};
- auto infered_out_shape = onert::shape_inference::inferAvgPoolShape(in_shape, avg_pool_param);
+ operation::Pool2D::Param avg_pool_param{
+ operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+ auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
ASSERT_EQ(infered_out_shape.rank(), 4);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
@@ -107,8 +112,9 @@ TEST(ShapeInference, Pool2DNodeExplicit)
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
- operation::MaxPool2D::Param max_pool_param{3, 6, stride, padding, Activation::NONE};
- infered_out_shape = onert::shape_inference::inferMaxPoolShape(in_shape, max_pool_param);
+ operation::Pool2D::Param max_pool_param{
+ operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+ infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
ASSERT_EQ(infered_out_shape.rank(), 4);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
@@ -122,7 +128,8 @@ TEST(ShapeInference, Conv2D)
Shape in_shape{10, 6, 12, 20};
Shape ker_shape{30, 3, 6, 20};
- operation::Conv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, Activation::NONE};
+ operation::Conv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, Activation::NONE,
+ Dilation{1, 1}};
auto infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
ASSERT_EQ(infered_out_shape.rank(), 4);
@@ -131,7 +138,8 @@ TEST(ShapeInference, Conv2D)
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
- param = operation::Conv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, Activation::NONE};
+ param = operation::Conv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, Activation::NONE,
+ Dilation{1, 1}};
infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
ASSERT_EQ(infered_out_shape.rank(), 4);
@@ -140,7 +148,8 @@ TEST(ShapeInference, Conv2D)
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
- param = operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE};
+ param =
+ operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE, Dilation{1, 1}};
infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
ASSERT_EQ(infered_out_shape.rank(), 4);