summaryrefslogtreecommitdiff
path: root/runtime/onert/core/src
diff options
context:
space:
mode:
Diffstat (limited to 'runtime/onert/core/src')
-rw-r--r--runtime/onert/core/src/backend/BackendContext.cc30
-rw-r--r--runtime/onert/core/src/backend/IConstantInitializer.cc112
-rw-r--r--runtime/onert/core/src/backend/IPortableTensor.cc (renamed from runtime/onert/core/src/backend/controlflow/Tensor.h)14
-rw-r--r--runtime/onert/core/src/backend/ITensor.cc11
-rw-r--r--runtime/onert/core/src/backend/basic/Allocator.cc (renamed from runtime/onert/core/src/backend/cpu_common/Allocator.cc)6
-rw-r--r--runtime/onert/core/src/backend/basic/BackendContextHelpers.cc17
-rw-r--r--runtime/onert/core/src/backend/basic/DynamicTensorManager.cc53
-rw-r--r--runtime/onert/core/src/backend/basic/MemoryManager.cc (renamed from runtime/onert/core/src/backend/cpu_common/MemoryManager.cc)35
-rw-r--r--runtime/onert/core/src/backend/basic/MemoryPlanner.cc (renamed from runtime/onert/core/src/backend/cpu_common/MemoryPlanner.cc)35
-rw-r--r--runtime/onert/core/src/backend/basic/MemoryPlanner.h (renamed from runtime/onert/core/src/backend/cpu_common/MemoryPlanner.h)14
-rw-r--r--runtime/onert/core/src/backend/basic/MemoryPlanner.test.cc (renamed from runtime/onert/core/src/backend/cpu_common/MemoryPlanner.test.cc)8
-rw-r--r--runtime/onert/core/src/backend/basic/MemoryPlannerFactory.cc (renamed from runtime/onert/core/src/backend/cpu_common/MemoryPlannerFactory.cc)4
-rw-r--r--runtime/onert/core/src/backend/basic/MemoryPlannerFactory.h (renamed from runtime/onert/core/src/backend/cpu_common/MemoryPlannerFactory.h)12
-rw-r--r--runtime/onert/core/src/backend/basic/StaticTensorManager.cc (renamed from runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc)59
-rw-r--r--runtime/onert/core/src/backend/basic/Tensor.cc121
-rw-r--r--runtime/onert/core/src/backend/basic/TensorBuilder.cc91
-rw-r--r--runtime/onert/core/src/backend/basic/train/TrainableTensor.cc (renamed from runtime/onert/core/src/backend/cpu_common/Tensor.cc)32
-rw-r--r--runtime/onert/core/src/backend/builtin/Backend.h (renamed from runtime/onert/core/src/backend/controlflow/Backend.h)53
-rw-r--r--runtime/onert/core/src/backend/builtin/BackendContext.cc58
-rw-r--r--runtime/onert/core/src/backend/builtin/BackendContext.h71
-rw-r--r--runtime/onert/core/src/backend/builtin/Config.cc (renamed from runtime/onert/core/src/backend/controlflow/Config.cc)8
-rw-r--r--runtime/onert/core/src/backend/builtin/Config.h (renamed from runtime/onert/core/src/backend/controlflow/Config.h)12
-rw-r--r--runtime/onert/core/src/backend/builtin/ConstantInitializer.h (renamed from runtime/onert/core/src/backend/controlflow/UserTensor.cc)23
-rw-r--r--runtime/onert/core/src/backend/builtin/DynamicTensorManager.h38
-rw-r--r--runtime/onert/core/src/backend/builtin/ExternalContext.h79
-rw-r--r--runtime/onert/core/src/backend/builtin/IOTensor.cc56
-rw-r--r--runtime/onert/core/src/backend/builtin/IOTensor.h97
-rw-r--r--runtime/onert/core/src/backend/builtin/KernelGenerator.cc159
-rw-r--r--runtime/onert/core/src/backend/builtin/KernelGenerator.h (renamed from runtime/onert/core/src/backend/controlflow/KernelGenerator.h)50
-rw-r--r--runtime/onert/core/src/backend/builtin/Tensor.h (renamed from runtime/onert/core/src/backend/controlflow/UserTensorRegistry.h)16
-rw-r--r--runtime/onert/core/src/backend/builtin/TensorBuilder.cc (renamed from runtime/onert/core/src/backend/controlflow/TensorBuilder.cc)48
-rw-r--r--runtime/onert/core/src/backend/builtin/TensorBuilder.h (renamed from runtime/onert/core/src/backend/controlflow/TensorBuilder.h)46
-rw-r--r--runtime/onert/core/src/backend/builtin/TensorRegistry.h134
-rw-r--r--runtime/onert/core/src/backend/builtin/UserTensor.cc53
-rw-r--r--runtime/onert/core/src/backend/builtin/UserTensor.h (renamed from runtime/onert/core/src/backend/controlflow/UserTensor.h)28
-rw-r--r--runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc80
-rw-r--r--runtime/onert/core/src/backend/builtin/kernel/IfLayer.h (renamed from runtime/onert/core/src/backend/controlflow/kernel/IfLayer.h)39
-rw-r--r--runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc316
-rw-r--r--runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h150
-rw-r--r--runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc151
-rw-r--r--runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h (renamed from runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.h)39
-rw-r--r--runtime/onert/core/src/backend/builtin/train/BackendContext.cc78
-rw-r--r--runtime/onert/core/src/backend/builtin/train/BackendContext.h76
-rw-r--r--runtime/onert/core/src/backend/builtin/train/KernelGenerator.cc98
-rw-r--r--runtime/onert/core/src/backend/builtin/train/KernelGenerator.h75
-rw-r--r--runtime/onert/core/src/backend/builtin/train/Tensor.h40
-rw-r--r--runtime/onert/core/src/backend/builtin/train/TensorRegistry.h132
-rw-r--r--runtime/onert/core/src/backend/builtin/train/kernel/PermuteLayer.cc85
-rw-r--r--runtime/onert/core/src/backend/builtin/train/kernel/PermuteLayer.h60
-rw-r--r--runtime/onert/core/src/backend/controlflow/ConstantInitializer.h52
-rw-r--r--runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc144
-rw-r--r--runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h72
-rw-r--r--runtime/onert/core/src/backend/controlflow/KernelGenerator.cc171
-rw-r--r--runtime/onert/core/src/backend/controlflow/TensorRegistry.h134
-rw-r--r--runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc128
-rw-r--r--runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc82
-rw-r--r--runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h77
-rw-r--r--runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc216
-rw-r--r--runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc137
-rw-r--r--runtime/onert/core/src/compiler/BackendManager.cc130
-rw-r--r--runtime/onert/core/src/compiler/Compiler.cc341
-rw-r--r--runtime/onert/core/src/compiler/CompilerFactory.cc58
-rw-r--r--runtime/onert/core/src/compiler/CompilerHelpers.h52
-rw-r--r--runtime/onert/core/src/compiler/CompilerOptions.cc146
-rw-r--r--runtime/onert/core/src/compiler/ExecutorFactory.cc1037
-rw-r--r--runtime/onert/core/src/compiler/ExecutorFactory.h82
-rw-r--r--runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc152
-rw-r--r--runtime/onert/core/src/compiler/Fp32ToFp16Converter.h8
-rw-r--r--runtime/onert/core/src/compiler/HEScheduler.cc106
-rw-r--r--runtime/onert/core/src/compiler/HEScheduler.h50
-rw-r--r--runtime/onert/core/src/compiler/HEScheduler.test.cc572
-rw-r--r--runtime/onert/core/src/compiler/Linear.cc201
-rw-r--r--runtime/onert/core/src/compiler/Linear.h20
-rw-r--r--runtime/onert/core/src/compiler/LoweredGraph.cc578
-rw-r--r--runtime/onert/core/src/compiler/ManualScheduler.cc33
-rw-r--r--runtime/onert/core/src/compiler/ManualScheduler.h4
-rw-r--r--runtime/onert/core/src/compiler/MultiModelCompiler.cc242
-rw-r--r--runtime/onert/core/src/compiler/MultiModelCompiler.h69
-rw-r--r--runtime/onert/core/src/compiler/OperationLowerInfo.cc (renamed from runtime/onert/core/src/ir/operation/LowerInfo.cc)13
-rw-r--r--runtime/onert/core/src/compiler/OperationValidator.cc1053
-rw-r--r--runtime/onert/core/src/compiler/ParamChecker.h73
-rw-r--r--runtime/onert/core/src/compiler/PermuteFactor.cc28
-rw-r--r--runtime/onert/core/src/compiler/ShapeValidator.cc1082
-rw-r--r--runtime/onert/core/src/compiler/ShapeValidator.h (renamed from runtime/onert/core/src/compiler/OperationValidator.h)22
-rw-r--r--runtime/onert/core/src/compiler/StaticShapeInference.cc1096
-rw-r--r--runtime/onert/core/src/compiler/StaticShapeInferer.cc1425
-rw-r--r--runtime/onert/core/src/compiler/TensorBuilders.h78
-rw-r--r--runtime/onert/core/src/compiler/TensorRegistries.h34
-rw-r--r--runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc39
-rw-r--r--runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h9
-rw-r--r--runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc23
-rw-r--r--runtime/onert/core/src/compiler/pass/ConstantLoweringPass.h2
-rw-r--r--runtime/onert/core/src/compiler/pass/ConstantOutputPass.cc68
-rw-r--r--runtime/onert/core/src/compiler/pass/ConstantOutputPass.h63
-rw-r--r--runtime/onert/core/src/compiler/pass/IPass.h41
-rw-r--r--runtime/onert/core/src/compiler/pass/LoweredOperandPass.h8
-rw-r--r--runtime/onert/core/src/compiler/pass/LoweredOperationPass.h10
-rw-r--r--runtime/onert/core/src/compiler/pass/OddOutputPass.cc90
-rw-r--r--runtime/onert/core/src/compiler/pass/OddOutputPass.h89
-rw-r--r--runtime/onert/core/src/compiler/pass/OperandPass.cc2
-rw-r--r--runtime/onert/core/src/compiler/pass/OperationPass.cc4
-rw-r--r--runtime/onert/core/src/compiler/pass/OperationPass.h4
-rw-r--r--runtime/onert/core/src/compiler/pass/Pass.h6
-rw-r--r--runtime/onert/core/src/compiler/pass/PassRunner.cc45
-rw-r--r--runtime/onert/core/src/compiler/pass/PassRunner.h53
-rw-r--r--runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc102
-rw-r--r--runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h4
-rw-r--r--runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc101
-rw-r--r--runtime/onert/core/src/compiler/pass/PermutationInsertionPass.h4
-rw-r--r--runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc137
-rw-r--r--runtime/onert/core/src/compiler/pass/PermutationOperationPass.h3
-rw-r--r--runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.cc64
-rw-r--r--runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.h54
-rw-r--r--runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.test.cc47
-rw-r--r--runtime/onert/core/src/compiler/train/LoweredTrainableGraph.cc285
-rw-r--r--runtime/onert/core/src/compiler/train/StaticDerivativeShapeInferer.cc150
-rw-r--r--runtime/onert/core/src/compiler/train/StaticDerivativeShapeInferer.h80
-rw-r--r--runtime/onert/core/src/compiler/train/TensorRegistries.h105
-rw-r--r--runtime/onert/core/src/compiler/train/TrainableOperationConverter.cc86
-rw-r--r--runtime/onert/core/src/compiler/train/TrainableOperationConverter.h57
-rw-r--r--runtime/onert/core/src/compiler/train/TrainingCompiler.cc299
-rw-r--r--runtime/onert/core/src/compiler/train/TrainingCompiler.h83
-rw-r--r--runtime/onert/core/src/compiler/train/UntrainableOperationConverter.cc53
-rw-r--r--runtime/onert/core/src/compiler/train/UntrainableOperationConverter.h52
-rw-r--r--runtime/onert/core/src/compiler/train/pass/LossInsertionPass.cc77
-rw-r--r--runtime/onert/core/src/compiler/train/pass/LossInsertionPass.h55
-rw-r--r--runtime/onert/core/src/compiler/train/pass/Pass.h64
-rw-r--r--runtime/onert/core/src/dumper/dot/DotBuilder.cc23
-rw-r--r--runtime/onert/core/src/dumper/dot/DotBuilder.h2
-rw-r--r--runtime/onert/core/src/dumper/dot/DotDumper.cc239
-rw-r--r--runtime/onert/core/src/dumper/dot/DotDumper.h27
-rw-r--r--runtime/onert/core/src/dumper/dot/DotSubgraphInfo.cc58
-rw-r--r--runtime/onert/core/src/dumper/dot/DotSubgraphInfo.h61
-rw-r--r--runtime/onert/core/src/dumper/dot/OperandNode.cc5
-rw-r--r--runtime/onert/core/src/dumper/dot/OperandNode.h1
-rw-r--r--runtime/onert/core/src/dumper/dot/OperationNode.cc5
-rw-r--r--runtime/onert/core/src/dumper/dot/OperationNode.h4
-rw-r--r--runtime/onert/core/src/dumper/h5/Dumper.cc (renamed from runtime/onert/core/src/compiler/ParamChecker.cc)21
-rw-r--r--runtime/onert/core/src/dumper/h5/Dumper.h51
-rw-r--r--runtime/onert/core/src/dumper/h5/MinMaxDumper.cc75
-rw-r--r--runtime/onert/core/src/dumper/h5/MinMaxDumper.h70
-rw-r--r--runtime/onert/core/src/dumper/text/GraphDumper.cc110
-rw-r--r--runtime/onert/core/src/dumper/text/GraphDumper.h66
-rw-r--r--runtime/onert/core/src/exec/DataflowExecutor.cc87
-rw-r--r--runtime/onert/core/src/exec/DataflowExecutor.h27
-rw-r--r--runtime/onert/core/src/exec/DynamicShapeInferer.cc (renamed from runtime/onert/core/src/exec/DynamicShapeInference.cc)445
-rw-r--r--runtime/onert/core/src/exec/ExecTime.cc6
-rw-r--r--runtime/onert/core/src/exec/ExecTime.h4
-rw-r--r--runtime/onert/core/src/exec/ExecTime.test.cc106
-rw-r--r--runtime/onert/core/src/exec/Execution.cc93
-rw-r--r--runtime/onert/core/src/exec/Execution.test.cc635
-rw-r--r--runtime/onert/core/src/exec/ExecutionObservee.cc28
-rw-r--r--runtime/onert/core/src/exec/ExecutionObservee.h14
-rw-r--r--runtime/onert/core/src/exec/ExecutionObservers.cc131
-rw-r--r--runtime/onert/core/src/exec/ExecutionObservers.h58
-rw-r--r--runtime/onert/core/src/exec/ExecutorBase.cc222
-rw-r--r--runtime/onert/core/src/exec/ExecutorBase.h68
-rw-r--r--runtime/onert/core/src/exec/Executors.cc649
-rw-r--r--runtime/onert/core/src/exec/Executors.h169
-rw-r--r--runtime/onert/core/src/exec/FunctionSequence.cc28
-rw-r--r--runtime/onert/core/src/exec/IPermuteFunction.cc320
-rw-r--r--runtime/onert/core/src/exec/IPermuteFunction.h378
-rw-r--r--runtime/onert/core/src/exec/IPermuteFunction.test.cc902
-rw-r--r--runtime/onert/core/src/exec/JSONExecTime.cc6
-rw-r--r--runtime/onert/core/src/exec/JSONExecTime.h18
-rw-r--r--runtime/onert/core/src/exec/LinearExecutor.cc61
-rw-r--r--runtime/onert/core/src/exec/LinearExecutor.h21
-rw-r--r--runtime/onert/core/src/exec/MinMaxRecorder.cc112
-rw-r--r--runtime/onert/core/src/exec/MinMaxRecorder.h56
-rw-r--r--runtime/onert/core/src/exec/ParallelExecutor.cc54
-rw-r--r--runtime/onert/core/src/exec/ParallelExecutor.h22
-rw-r--r--runtime/onert/core/src/exec/ParallelScheduler.cc4
-rw-r--r--runtime/onert/core/src/exec/SingleModelExecutors.cc61
-rw-r--r--runtime/onert/core/src/exec/SingleModelExecutors.h70
-rw-r--r--runtime/onert/core/src/exec/Sink.h199
-rw-r--r--runtime/onert/core/src/exec/Source.h208
-rw-r--r--runtime/onert/core/src/exec/ThreadPool.cc2
-rw-r--r--runtime/onert/core/src/exec/feature/MockTensor.h66
-rw-r--r--runtime/onert/core/src/exec/feature/nchw/Reader.h42
-rw-r--r--runtime/onert/core/src/exec/feature/nchw/Reader.test.cc85
-rw-r--r--runtime/onert/core/src/exec/feature/nchw/View.h4
-rw-r--r--runtime/onert/core/src/exec/feature/nchw/View.test.cc85
-rw-r--r--runtime/onert/core/src/exec/feature/nhwc/Reader.h41
-rw-r--r--runtime/onert/core/src/exec/feature/nhwc/Reader.test.cc86
-rw-r--r--runtime/onert/core/src/exec/feature/nhwc/View.h8
-rw-r--r--runtime/onert/core/src/exec/feature/nhwc/View.test.cc86
-rw-r--r--runtime/onert/core/src/exec/train/TrainableExecutor.cc204
-rw-r--r--runtime/onert/core/src/exec/train/TrainableExecutor.h109
-rw-r--r--runtime/onert/core/src/exec/train/TrainableExecutors.cc89
-rw-r--r--runtime/onert/core/src/exec/train/TrainableExecutors.h92
-rw-r--r--runtime/onert/core/src/exec/train/TrainableFnSequence.cc67
-rw-r--r--runtime/onert/core/src/exec/train/optimizer/OptimizerCode.cc42
-rw-r--r--runtime/onert/core/src/exec/train/optimizer/OptimizerHelpers.h47
-rw-r--r--runtime/onert/core/src/exec/train/optimizer/SGD.cc66
-rw-r--r--runtime/onert/core/src/interp/Buffer.h91
-rw-r--r--runtime/onert/core/src/interp/ExecEnv.h212
-rw-r--r--runtime/onert/core/src/interp/InterpExecutor.cc126
-rw-r--r--runtime/onert/core/src/interp/InterpExecutor.h70
-rw-r--r--runtime/onert/core/src/interp/InterpOps.lst73
-rw-r--r--runtime/onert/core/src/interp/Interpreter.cc184
-rw-r--r--runtime/onert/core/src/interp/Interpreter.h64
-rw-r--r--runtime/onert/core/src/interp/Registration.h43
-rw-r--r--runtime/onert/core/src/interp/Tensor.cc53
-rw-r--r--runtime/onert/core/src/interp/Tensor.h184
-rw-r--r--runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc205
-rw-r--r--runtime/onert/core/src/interp/operations/Concat.cc147
-rw-r--r--runtime/onert/core/src/interp/operations/Conv2D.cc151
-rw-r--r--runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc156
-rw-r--r--runtime/onert/core/src/interp/operations/ElementwiseActivations.cc161
-rw-r--r--runtime/onert/core/src/interp/operations/FullyConnected.cc136
-rw-r--r--runtime/onert/core/src/interp/operations/Gather.cc138
-rw-r--r--runtime/onert/core/src/interp/operations/InstanceNorm.cc121
-rw-r--r--runtime/onert/core/src/interp/operations/OperationUtil.h203
-rw-r--r--runtime/onert/core/src/interp/operations/Pad.cc106
-rw-r--r--runtime/onert/core/src/interp/operations/Pool2D.cc140
-rw-r--r--runtime/onert/core/src/interp/operations/Reshape.cc63
-rw-r--r--runtime/onert/core/src/interp/operations/Softmax.cc123
-rw-r--r--runtime/onert/core/src/interp/operations/TransposeConv.cc141
-rw-r--r--runtime/onert/core/src/ir/DataType.cc6
-rw-r--r--runtime/onert/core/src/ir/Graph.cc174
-rw-r--r--runtime/onert/core/src/ir/Graph.test.cc147
-rw-r--r--runtime/onert/core/src/ir/GraphIterator.cc121
-rw-r--r--runtime/onert/core/src/ir/GraphIterator.h90
-rw-r--r--runtime/onert/core/src/ir/LayoutSet.cc8
-rw-r--r--runtime/onert/core/src/ir/LayoutSet.h1
-rw-r--r--runtime/onert/core/src/ir/LayoutSet.test.cc67
-rw-r--r--runtime/onert/core/src/ir/MockNode.h47
-rw-r--r--runtime/onert/core/src/ir/OpSequence.cc95
-rw-r--r--runtime/onert/core/src/ir/OpSequences.cc124
-rw-r--r--runtime/onert/core/src/ir/Operand.cc6
-rw-r--r--runtime/onert/core/src/ir/Operand.test.cc86
-rw-r--r--runtime/onert/core/src/ir/OperandIndexSequence.cc13
-rw-r--r--runtime/onert/core/src/ir/OperandIndexSequence.test.cc52
-rw-r--r--runtime/onert/core/src/ir/Operands.cc2
-rw-r--r--runtime/onert/core/src/ir/Operands.test.cc45
-rw-r--r--runtime/onert/core/src/ir/Operation.cc21
-rw-r--r--runtime/onert/core/src/ir/Operation.test.cc98
-rw-r--r--runtime/onert/core/src/ir/OperationCloner.cc26
-rw-r--r--runtime/onert/core/src/ir/OperationCloner.h14
-rw-r--r--runtime/onert/core/src/ir/OperationDumper.cc280
-rw-r--r--runtime/onert/core/src/ir/OperationDumper.h6
-rw-r--r--runtime/onert/core/src/ir/OperationValidator.cc545
-rw-r--r--runtime/onert/core/src/ir/OperationValidator.h101
-rw-r--r--runtime/onert/core/src/ir/Operations.cc9
-rw-r--r--runtime/onert/core/src/ir/Operations.test.cc42
-rw-r--r--runtime/onert/core/src/ir/Padding.cc10
-rw-r--r--runtime/onert/core/src/ir/Shape.cc41
-rw-r--r--runtime/onert/core/src/ir/Shape.test.cc58
-rw-r--r--runtime/onert/core/src/ir/TypeInfo.cc2
-rw-r--r--runtime/onert/core/src/ir/operation/AddN.cc36
-rw-r--r--runtime/onert/core/src/ir/operation/ArgMinMax.cc (renamed from runtime/onert/core/src/ir/operation/ArgMax.cc)13
-rw-r--r--runtime/onert/core/src/ir/operation/BCQFullyConnected.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/BCQGather.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/BatchMatMul.cc2
-rw-r--r--runtime/onert/core/src/ir/operation/BatchToSpaceND.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/BinaryArithmetic.cc14
-rw-r--r--runtime/onert/core/src/ir/operation/BroadcastTo.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/Bulk.cc36
-rw-r--r--runtime/onert/core/src/ir/operation/Comparison.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/Concat.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/Conv2D.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/Custom.cc2
-rw-r--r--runtime/onert/core/src/ir/operation/DepthToSpace.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/DetectionPostProcess.cc37
-rw-r--r--runtime/onert/core/src/ir/operation/Einsum.cc2
-rw-r--r--runtime/onert/core/src/ir/operation/ElementwiseActivation.cc30
-rw-r--r--runtime/onert/core/src/ir/operation/ElementwiseBinary.cc15
-rw-r--r--runtime/onert/core/src/ir/operation/ElementwiseUnary.cc42
-rw-r--r--runtime/onert/core/src/ir/operation/EmbeddingLookup.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/ExpandDims.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/Fill.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/FullyConnected.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/FusedBatchNorm.cc2
-rw-r--r--runtime/onert/core/src/ir/operation/Gather.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/HashtableLookup.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/If.cc2
-rw-r--r--runtime/onert/core/src/ir/operation/InstanceNorm.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/L2Normalization.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/LSTM.cc13
-rw-r--r--runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/LogSoftmax.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/Loss.cc52
-rw-r--r--runtime/onert/core/src/ir/operation/MatrixBandPart.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/OneHot.cc2
-rw-r--r--runtime/onert/core/src/ir/operation/PReLU.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/Pack.cc2
-rw-r--r--runtime/onert/core/src/ir/operation/Pad.cc2
-rw-r--r--runtime/onert/core/src/ir/operation/Permute.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/Pool2D.cc12
-rw-r--r--runtime/onert/core/src/ir/operation/Pow.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/RNN.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/Range.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/Rank.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/Reduce.cc20
-rw-r--r--runtime/onert/core/src/ir/operation/Reshape.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/ResizeBilinear.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/Reverse.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/Select.cc2
-rw-r--r--runtime/onert/core/src/ir/operation/Shape.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/Slice.cc2
-rw-r--r--runtime/onert/core/src/ir/operation/Softmax.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/SpaceToBatchND.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/SpaceToDepth.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/Split.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/SplitV.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/SquaredDifference.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/Squeeze.cc2
-rw-r--r--runtime/onert/core/src/ir/operation/StatelessRandomUniform.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/StridedSlice.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/Tile.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/TopKV2.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/Transpose.cc8
-rw-r--r--runtime/onert/core/src/ir/operation/TransposeConv.cc5
-rw-r--r--runtime/onert/core/src/ir/operation/Unpack.cc3
-rw-r--r--runtime/onert/core/src/ir/operation/While.cc3
-rw-r--r--runtime/onert/core/src/ir/train/TrainableGraph.cc145
-rw-r--r--runtime/onert/core/src/ir/train/operation/Conv2D.cc49
-rw-r--r--runtime/onert/core/src/ir/train/operation/ElementwiseActivation.cc49
-rw-r--r--runtime/onert/core/src/ir/train/operation/FullyConnected.cc49
-rw-r--r--runtime/onert/core/src/ir/train/operation/Loss.cc48
-rw-r--r--runtime/onert/core/src/ir/train/operation/Permute.cc50
-rw-r--r--runtime/onert/core/src/ir/train/operation/Pool2D.cc49
-rw-r--r--runtime/onert/core/src/ir/train/operation/Reshape.cc49
-rw-r--r--runtime/onert/core/src/ir/train/operation/Softmax.cc49
-rw-r--r--runtime/onert/core/src/ir/verifier/Verifier.cc54
-rw-r--r--runtime/onert/core/src/ir/verifier/Verifier.h11
-rw-r--r--runtime/onert/core/src/ir/verifier/Verifier.test.cc93
-rw-r--r--runtime/onert/core/src/odc/QuantizeManager.cc50
-rw-r--r--runtime/onert/core/src/odc/QuantizeManager.test.cc (renamed from runtime/onert/core/src/util/GeneralConfigSource.cc)37
-rw-r--r--runtime/onert/core/src/odc/QuantizerLoader.cc104
-rw-r--r--runtime/onert/core/src/odc/QuantizerLoader.h89
-rw-r--r--runtime/onert/core/src/odc/QuantizerLoader.test.cc63
-rw-r--r--runtime/onert/core/src/util/ChromeTracingEventWriter.cc195
-rw-r--r--runtime/onert/core/src/util/ConfigSource.cc36
-rw-r--r--runtime/onert/core/src/util/EventCollector.cc77
-rw-r--r--runtime/onert/core/src/util/EventCollector.h70
-rw-r--r--runtime/onert/core/src/util/EventCollectorGlobal.cc93
-rw-r--r--runtime/onert/core/src/util/EventCollectorGlobal.h155
-rw-r--r--runtime/onert/core/src/util/EventRecorder.cc532
-rw-r--r--runtime/onert/core/src/util/EventRecorder.h63
-rw-r--r--runtime/onert/core/src/util/EventWriter.cc49
-rw-r--r--runtime/onert/core/src/util/EventWriter.h144
-rw-r--r--runtime/onert/core/src/util/Index.test.cc (renamed from runtime/onert/core/src/util/EnvConfigSource.cc)34
-rw-r--r--runtime/onert/core/src/util/MDTableEventWriter.cc365
-rw-r--r--runtime/onert/core/src/util/ObjectManager.test.cc211
-rw-r--r--runtime/onert/core/src/util/SNPEEventWriter.cc186
-rw-r--r--runtime/onert/core/src/util/ShapeInference.cc243
-rw-r--r--runtime/onert/core/src/util/ShapeInference.test.cc544
-rw-r--r--runtime/onert/core/src/util/TracingCtx.cc30
353 files changed, 20776 insertions, 12212 deletions
diff --git a/runtime/onert/core/src/backend/BackendContext.cc b/runtime/onert/core/src/backend/BackendContext.cc
index bafa36d28..7b36f106d 100644
--- a/runtime/onert/core/src/backend/BackendContext.cc
+++ b/runtime/onert/core/src/backend/BackendContext.cc
@@ -16,40 +16,10 @@
#include "backend/BackendContext.h"
-#include "ir/Operation.h"
-#include "backend/IConstantInitializer.h"
-
namespace onert
{
namespace backend
{
-void BackendContext::initialize(const std::vector<OperationInfo> &operation_list,
- const std::vector<ir::OperandIndex> &operand_list)
-{
- _operation_list = operation_list;
- _operand_list = operand_list;
-}
-
-void BackendContext::initConsts()
-{
- for (auto &op : _operation_list)
- {
- constant_initializer->setLayout(op.layout);
- _graph->operations().at(op.index).accept(*constant_initializer);
- }
-
- for (auto ind : _operand_list)
- {
- const auto &obj = _graph->operands().at(ind);
- if (obj.isConstant() && !constant_initializer->exist(ind))
- {
- constant_initializer->registerDefaultInitializer(ind, obj);
- }
- }
-
- constant_initializer->run();
-}
-
} // namespace backend
} // namespace onert
diff --git a/runtime/onert/core/src/backend/IConstantInitializer.cc b/runtime/onert/core/src/backend/IConstantInitializer.cc
deleted file mode 100644
index 934a42753..000000000
--- a/runtime/onert/core/src/backend/IConstantInitializer.cc
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "backend/IConstantInitializer.h"
-
-#include <Half.h>
-
-using float16 = Half;
-
-namespace onert
-{
-namespace backend
-{
-
-void IConstantInitializer::registerCopyInitializer(const ir::OperandIndex &index,
- const ir::Operand &obj)
-{
- // For only CONSTANTS
- // TODO Add to check if tensor has been allocated
- if (!obj.isConstant())
- return;
-
- const auto type = obj.typeInfo().type();
- using ir::DataType;
-
- switch (type)
- {
- case DataType::FLOAT32:
- _init_map[index] = copyInit<float>;
- break;
- case DataType::INT32:
- _init_map[index] = copyInit<int32_t>;
- break;
- case DataType::UINT32:
- _init_map[index] = copyInit<uint32_t>;
- break;
- case DataType::BOOL8:
- case DataType::QUANT_UINT8_ASYMM:
- _init_map[index] = copyInit<uint8_t>;
- break;
- case DataType::QUANT_INT8_SYMM:
- _init_map[index] = copyInit<int8_t>;
- break;
- case DataType::FLOAT16:
- _init_map[index] = copyInit<float16>;
- break;
- case DataType::INT64:
- _init_map[index] = copyInit<int64_t>;
- break;
- default:
- throw std::runtime_error("Not supported, yet");
- break;
- }
-}
-
-void IConstantInitializer::registerPermuteInitializer(const ir::OperandIndex &index,
- const ir::Operand &obj)
-{
- // For only CONSTANTS
- // TODO Add to check if tensor has been allocated
- if (!obj.isConstant())
- return;
-
- const auto type = obj.typeInfo().type();
- using ir::DataType;
- using namespace std::placeholders;
-
- switch (type)
- {
- case DataType::FLOAT32:
- _init_map[index] = std::bind(permuteInit<float>, _1, _2, _current_op_seq_layout);
- break;
- case DataType::INT32:
- _init_map[index] = std::bind(permuteInit<int32_t>, _1, _2, _current_op_seq_layout);
- break;
- case DataType::UINT32:
- _init_map[index] = std::bind(permuteInit<uint32_t>, _1, _2, _current_op_seq_layout);
- break;
- case DataType::BOOL8:
- case DataType::QUANT_UINT8_ASYMM:
- _init_map[index] = std::bind(permuteInit<uint8_t>, _1, _2, _current_op_seq_layout);
- break;
- case DataType::QUANT_INT8_SYMM:
- _init_map[index] = std::bind(permuteInit<int8_t>, _1, _2, _current_op_seq_layout);
- break;
- case DataType::FLOAT16:
- _init_map[index] = std::bind(permuteInit<float16>, _1, _2, _current_op_seq_layout);
- break;
- case DataType::INT64:
- _init_map[index] = std::bind(permuteInit<int64_t>, _1, _2, _current_op_seq_layout);
- break;
- default:
- throw std::runtime_error("Not supported, yet");
- break;
- }
-}
-
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/Tensor.h b/runtime/onert/core/src/backend/IPortableTensor.cc
index ba5bafd75..cec34e780 100644
--- a/runtime/onert/core/src/backend/controlflow/Tensor.h
+++ b/runtime/onert/core/src/backend/IPortableTensor.cc
@@ -14,22 +14,16 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_H__
-#define __ONERT_BACKEND_CONTROLFLOW_TENSOR_H__
-
-#include <backend/cpu_common/Tensor.h>
+#include "backend/IPortableTensor.h"
namespace onert
{
namespace backend
{
-namespace controlflow
-{
-using Tensor = cpu_common::Tensor;
+// `dynamic_cast` not working across library boundaries on NDK
+// With this as a key function, `dynamic_cast` works across dl
+IPortableTensor::~IPortableTensor() {}
-} // namespace controlflow
} // namespace backend
} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_TENSOR_H__
diff --git a/runtime/onert/core/src/backend/ITensor.cc b/runtime/onert/core/src/backend/ITensor.cc
index 7127ed93d..1339cb409 100644
--- a/runtime/onert/core/src/backend/ITensor.cc
+++ b/runtime/onert/core/src/backend/ITensor.cc
@@ -21,14 +21,9 @@ namespace onert
namespace backend
{
-ir::Shape ITensor::getShape() const
-{
- onert::ir::Shape shape(num_dimensions());
- for (uint32_t d = 0; d < num_dimensions(); d++)
- shape.dim(d) = dimension(d);
-
- return shape;
-}
+// `dynamic_cast` not working across library boundaries on NDK
+// With this as a key function, `dynamic_cast` works across dl
+ITensor::~ITensor() {}
} // namespace backend
} // namespace onert
diff --git a/runtime/onert/core/src/backend/cpu_common/Allocator.cc b/runtime/onert/core/src/backend/basic/Allocator.cc
index 0ba444ee6..61214dfad 100644
--- a/runtime/onert/core/src/backend/cpu_common/Allocator.cc
+++ b/runtime/onert/core/src/backend/basic/Allocator.cc
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "backend/cpu_common/Allocator.h"
+#include "backend/basic/Allocator.h"
#include "util/logging.h"
@@ -22,7 +22,7 @@ namespace onert
{
namespace backend
{
-namespace cpu_common
+namespace basic
{
Allocator::Allocator(uint32_t capacity)
@@ -33,6 +33,6 @@ Allocator::Allocator(uint32_t capacity)
VERBOSE(ALLOC) << "base pointer: " << static_cast<void *>(_base.get()) << std::endl;
}
-} // namespace cpu_common
+} // namespace basic
} // namespace backend
} // namespace onert
diff --git a/runtime/onert/core/src/backend/basic/BackendContextHelpers.cc b/runtime/onert/core/src/backend/basic/BackendContextHelpers.cc
new file mode 100644
index 000000000..c02cc0cf2
--- /dev/null
+++ b/runtime/onert/core/src/backend/basic/BackendContextHelpers.cc
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/basic/BackendContextHelpers.h"
diff --git a/runtime/onert/core/src/backend/basic/DynamicTensorManager.cc b/runtime/onert/core/src/backend/basic/DynamicTensorManager.cc
new file mode 100644
index 000000000..07bcb09ee
--- /dev/null
+++ b/runtime/onert/core/src/backend/basic/DynamicTensorManager.cc
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/basic/DynamicTensorManager.h"
+
+#include "util/logging.h"
+#include "misc/polymorphic_downcast.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+DynamicTensorManager::DynamicTensorManager(const std::shared_ptr<TensorRegistry> &reg)
+ : _dynamic_mem_mgr{new DynamicMemoryManager()}, _tensors{reg}
+{
+ // DO NOTHING
+}
+
+void DynamicTensorManager::buildTensor(const ir::OperandIndex &ind,
+ const ir::OperandInfo &tensor_info,
+ ir::Layout backend_layout)
+{
+ assert(_tensors->getNativeTensor(ind) == nullptr);
+ auto tensor = std::make_unique<Tensor>(tensor_info, backend_layout, _dynamic_mem_mgr.get());
+ _tensors->setNativeTensor(ind, std::move(tensor));
+}
+
+const ITensor *DynamicTensorManager::getRawITensor(ir::OperandIndex ind)
+{
+ auto ptr = _tensors->getITensor(ind);
+ assert(ptr);
+ return ptr;
+}
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/cpu_common/MemoryManager.cc b/runtime/onert/core/src/backend/basic/MemoryManager.cc
index 8cb9c22ca..05fd9cc77 100644
--- a/runtime/onert/core/src/backend/cpu_common/MemoryManager.cc
+++ b/runtime/onert/core/src/backend/basic/MemoryManager.cc
@@ -14,18 +14,19 @@
* limitations under the License.
*/
-#include <backend/cpu_common/MemoryManager.h>
+#include <backend/basic/MemoryManager.h>
#include <cassert>
#include "MemoryPlannerFactory.h"
#include "util/ConfigSource.h"
+#include "util/logging.h"
namespace onert
{
namespace backend
{
-namespace cpu_common
+namespace basic
{
MemoryManager::MemoryManager() : _mem_planner{createMemoryPlanner()}
@@ -34,20 +35,20 @@ MemoryManager::MemoryManager() : _mem_planner{createMemoryPlanner()}
}
MemoryManager::MemoryManager(const std::string planner_id)
- : _mem_planner{createMemoryPlanner(planner_id)}
+ : _mem_planner{createMemoryPlanner(planner_id)}
{
// DO NOTHING
}
-cpu_common::IMemoryPlanner *MemoryManager::createMemoryPlanner()
+basic::IMemoryPlanner *MemoryManager::createMemoryPlanner()
{
auto planner_id = util::getConfigString(util::config::CPU_MEMORY_PLANNER);
- return cpu_common::MemoryPlannerFactory::get().create(planner_id);
+ return basic::MemoryPlannerFactory::get().create(planner_id);
}
-cpu_common::IMemoryPlanner *MemoryManager::createMemoryPlanner(const std::string planner_id)
+basic::IMemoryPlanner *MemoryManager::createMemoryPlanner(const std::string planner_id)
{
- return cpu_common::MemoryPlannerFactory::get().create(planner_id);
+ return basic::MemoryPlannerFactory::get().create(planner_id);
}
void MemoryManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
@@ -59,7 +60,7 @@ void MemoryManager::releasePlan(const ir::OperandIndex &ind) { _mem_planner->rel
void MemoryManager::allocate(void)
{
- _mem_alloc = std::make_shared<cpu_common::Allocator>(_mem_planner->capacity());
+ _mem_alloc = std::make_shared<basic::Allocator>(_mem_planner->capacity());
assert(_mem_alloc->base());
}
@@ -70,20 +71,20 @@ uint8_t *MemoryManager::getBuffer(const ir::OperandIndex &ind) const
return _mem_alloc->base() + mem_blk.offset;
}
-std::shared_ptr<cpu_common::Allocator> DynamicMemoryManager::allocate(const ir::OperandIndex &ind,
- uint32_t capacity)
+std::shared_ptr<basic::Allocator> DynamicMemoryManager::allocate(const ITensor *tensor,
+ uint32_t capacity)
{
- auto find = _mem_alloc_map.find(ind);
+ auto find = _mem_alloc_map.find(tensor);
if (find != _mem_alloc_map.end())
throw std::runtime_error("Cannot allocate memory for a tensor. It was already allocated.");
- _mem_alloc_map[ind] = std::make_shared<cpu_common::Allocator>(capacity);
- return _mem_alloc_map[ind];
+ _mem_alloc_map[tensor] = std::make_shared<basic::Allocator>(capacity);
+ return _mem_alloc_map[tensor];
}
-void DynamicMemoryManager::deallocate(const ir::OperandIndex &ind)
+void DynamicMemoryManager::deallocate(const ITensor *tensor)
{
- auto find = _mem_alloc_map.find(ind);
+ auto find = _mem_alloc_map.find(tensor);
if (find == _mem_alloc_map.end())
throw std::runtime_error("Cannot find Allocator for the requested index");
@@ -93,7 +94,7 @@ void DynamicMemoryManager::deallocate(const ir::OperandIndex &ind)
void DynamicMemoryManager::deallocate(void)
{
- for (auto &mem_alloc : _mem_alloc_map)
+ for (auto &&mem_alloc : _mem_alloc_map)
{
// Release memory buffer of mem_alloc
mem_alloc.second->release();
@@ -102,6 +103,6 @@ void DynamicMemoryManager::deallocate(void)
_mem_alloc_map.clear();
}
-} // namespace cpu_common
+} // namespace basic
} // namespace backend
} // namespace onert
diff --git a/runtime/onert/core/src/backend/cpu_common/MemoryPlanner.cc b/runtime/onert/core/src/backend/basic/MemoryPlanner.cc
index 75c2da7d2..1c048043c 100644
--- a/runtime/onert/core/src/backend/cpu_common/MemoryPlanner.cc
+++ b/runtime/onert/core/src/backend/basic/MemoryPlanner.cc
@@ -22,24 +22,21 @@ namespace onert
{
namespace backend
{
-namespace cpu_common
+namespace basic
{
void BumpPlanner::claim(const ir::OperandIndex &ind, size_t size)
{
- assert(size != 0);
-
Block blk{_capacity, size};
_mem_plans[ind] = blk;
_capacity += size;
- VERBOSE(BP_PLANNER) << "CLAIM(#" << ind.value() << "): " << blk.offset << ", " << blk.size
- << std::endl;
+ VERBOSE(BP_PLANNER) << "CLAIM(" << ind << "): " << blk.offset << ", " << blk.size << std::endl;
}
void BumpPlanner::release(const ir::OperandIndex &ind)
{
- VERBOSE(BP_PLANNER) << "RELEASE(#" << ind.value() << "): "
+ VERBOSE(BP_PLANNER) << "RELEASE(" << ind << "): "
<< "NOTHING does" << std::endl;
}
@@ -59,11 +56,9 @@ void BumpPlanner::release(const ir::OperandIndex &ind)
// the previous claim_base_offset.
void FirstFitPlanner::claim(const ir::OperandIndex &ind, size_t size)
{
- assert(size != 0);
-
// Find the right position for claiming
uint32_t next_offset = 0;
- for (auto &mem_claim : _claim_table)
+ for (const auto &mem_claim : _claim_table)
{
auto claimed_base_offset = mem_claim.first;
auto claimed_size = _mem_plans[mem_claim.second].size;
@@ -81,7 +76,7 @@ void FirstFitPlanner::claim(const ir::OperandIndex &ind, size_t size)
_claim_table[next_offset] = ind;
_mem_plans[ind] = {next_offset, size};
- VERBOSE(FF_PLANNER) << "claim(#" << ind.value() << "): [+" << next_offset << ", " << size << "sz]"
+ VERBOSE(FF_PLANNER) << "claim(" << ind << "): [+" << next_offset << ", " << size << "sz]"
<< std::endl;
if (_capacity < next_offset + size)
@@ -102,7 +97,7 @@ void FirstFitPlanner::release(const ir::OperandIndex &ind)
_claim_table.erase(it);
- VERBOSE(FF_PLANNER) << "release(#" << index << "): [+" << offset << ", " << size << "sz]"
+ VERBOSE(FF_PLANNER) << "release(" << index << "): [+" << offset << ", " << size << "sz]"
<< std::endl;
return;
}
@@ -111,16 +106,14 @@ void FirstFitPlanner::release(const ir::OperandIndex &ind)
}
WICPlanner::WICPlanner()
- : _initialized(false), _capacity(0), _mem_plans(), _live_operands(), _interference_graph(),
- _operands()
+ : _initialized(false), _capacity(0), _mem_plans(), _live_operands(), _interference_graph(),
+ _operands()
{
// DO NOTHING
}
void WICPlanner::claim(const ir::OperandIndex &ind, size_t size)
{
- assert(size != 0);
-
_operands.emplace(size, ind);
_interference_graph[ind].insert(_interference_graph[ind].end(), _live_operands.cbegin(),
_live_operands.cend());
@@ -130,13 +123,13 @@ void WICPlanner::claim(const ir::OperandIndex &ind, size_t size)
}
_live_operands.emplace(ind);
- VERBOSE(WIC_PLANNER) << "claim(#" << ind.value() << "): [" << size << "sz]" << std::endl;
+ VERBOSE(WIC_PLANNER) << "claim(" << ind << "): [" << size << "sz]" << std::endl;
}
void WICPlanner::release(const ir::OperandIndex &ind)
{
_live_operands.erase(ind);
- VERBOSE(WIC_PLANNER) << "release(#" << ind.value() << ")" << std::endl;
+ VERBOSE(WIC_PLANNER) << "release(" << ind << ")" << std::endl;
}
/*
@@ -154,7 +147,7 @@ void WICPlanner::buildMemoryPlans()
{
uint32_t size = operand.first;
const ir::OperandIndex &ind = operand.second;
- VERBOSE(WIC_PLANNER) << "build_plan(#" << ind.value() << "): [" << size << "sz]" << std::endl;
+ VERBOSE(WIC_PLANNER) << "build_plan(" << ind << "): [" << size << "sz]" << std::endl;
uint32_t next_offset = 0;
if (_interference_graph.count(ind))
@@ -190,8 +183,8 @@ void WICPlanner::buildMemoryPlans()
}
_mem_plans[ind] = {next_offset, size};
- VERBOSE(WIC_PLANNER) << "alloc(#" << ind.value() << "): [+" << next_offset << ", " << size
- << "sz]" << std::endl;
+ VERBOSE(WIC_PLANNER) << "alloc(" << ind << "): [+" << next_offset << ", " << size << "sz]"
+ << std::endl;
if (_capacity < next_offset + size)
{
@@ -210,6 +203,6 @@ WICPlanner::MemoryPlans &WICPlanner::memory_plans()
return _mem_plans;
}
-} // namespace cpu_common
+} // namespace basic
} // namespace backend
} // namespace onert
diff --git a/runtime/onert/core/src/backend/cpu_common/MemoryPlanner.h b/runtime/onert/core/src/backend/basic/MemoryPlanner.h
index 7c387e542..661d0b5d9 100644
--- a/runtime/onert/core/src/backend/cpu_common/MemoryPlanner.h
+++ b/runtime/onert/core/src/backend/basic/MemoryPlanner.h
@@ -19,23 +19,23 @@
* @brief       This file contains Memory Planning related classes
*/
-#ifndef __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_H__
-#define __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_H__
+#ifndef __ONERT_BACKEND_BASIC_MEMORY_PLANNER_H__
+#define __ONERT_BACKEND_BASIC_MEMORY_PLANNER_H__
#include <map>
#include <vector>
#include <unordered_set>
#include <memory>
-#include "backend/cpu_common/Allocator.h"
-#include "backend/cpu_common/IMemoryPlanner.h"
+#include "backend/basic/Allocator.h"
+#include "backend/basic/IMemoryPlanner.h"
#include "ir/OperandIndexMap.h"
namespace onert
{
namespace backend
{
-namespace cpu_common
+namespace basic
{
/**
@@ -153,8 +153,8 @@ private:
std::multimap<uint32_t, ir::OperandIndex, std::greater<uint32_t>> _operands;
};
-} // namespace cpu_common
+} // namespace basic
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_H__
+#endif // __ONERT_BACKEND_BASIC_MEMORY_PLANNER_H__
diff --git a/runtime/onert/core/src/backend/cpu_common/MemoryPlanner.test.cc b/runtime/onert/core/src/backend/basic/MemoryPlanner.test.cc
index 5208a94d4..a32228cbe 100644
--- a/runtime/onert/core/src/backend/cpu_common/MemoryPlanner.test.cc
+++ b/runtime/onert/core/src/backend/basic/MemoryPlanner.test.cc
@@ -21,13 +21,13 @@
TEST(Allocator, allocate_test)
{
- ::onert::backend::cpu_common::Allocator allocator(1024);
+ ::onert::backend::basic::Allocator allocator(1024);
ASSERT_NE(allocator.base(), nullptr);
}
TEST(BumpPlanner, claim_test)
{
- ::onert::backend::cpu_common::BumpPlanner planner;
+ ::onert::backend::basic::BumpPlanner planner;
auto claim = [&planner](uint32_t index, size_t size, uint32_t expected_offset) {
onert::ir::OperandIndex mem_idx(index);
@@ -44,7 +44,7 @@ TEST(BumpPlanner, claim_test)
TEST(FirstFitPlanner, claim_release_test)
{
- ::onert::backend::cpu_common::FirstFitPlanner planner;
+ ::onert::backend::basic::FirstFitPlanner planner;
auto claim = [&planner](uint32_t index, size_t size, uint32_t expected_offset) {
onert::ir::OperandIndex mem_idx(index);
@@ -128,7 +128,7 @@ TEST(FirstFitPlanner, claim_release_test)
TEST(WICPlanner, claim_release_test)
{
- ::onert::backend::cpu_common::WICPlanner planner;
+ ::onert::backend::basic::WICPlanner planner;
auto claim = [&planner](uint32_t index, size_t size) {
onert::ir::OperandIndex mem_idx(index);
diff --git a/runtime/onert/core/src/backend/cpu_common/MemoryPlannerFactory.cc b/runtime/onert/core/src/backend/basic/MemoryPlannerFactory.cc
index ead4f3294..e12635359 100644
--- a/runtime/onert/core/src/backend/cpu_common/MemoryPlannerFactory.cc
+++ b/runtime/onert/core/src/backend/basic/MemoryPlannerFactory.cc
@@ -22,7 +22,7 @@ namespace onert
{
namespace backend
{
-namespace cpu_common
+namespace basic
{
MemoryPlannerFactory &MemoryPlannerFactory::get()
@@ -48,6 +48,6 @@ IMemoryPlanner *MemoryPlannerFactory::create(const std::string &key)
return new FirstFitPlanner; // Default Planner
}
-} // namespace cpu_common
+} // namespace basic
} // namespace backend
} // namespace onert
diff --git a/runtime/onert/core/src/backend/cpu_common/MemoryPlannerFactory.h b/runtime/onert/core/src/backend/basic/MemoryPlannerFactory.h
index d14ec13ca..fe32f4c99 100644
--- a/runtime/onert/core/src/backend/cpu_common/MemoryPlannerFactory.h
+++ b/runtime/onert/core/src/backend/basic/MemoryPlannerFactory.h
@@ -14,10 +14,10 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_FACTORY_H__
-#define __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_FACTORY_H__
+#ifndef __ONERT_BACKEND_BASIC_MEMORY_PLANNER_FACTORY_H__
+#define __ONERT_BACKEND_BASIC_MEMORY_PLANNER_FACTORY_H__
-#include "backend/cpu_common/IMemoryPlanner.h"
+#include "backend/basic/IMemoryPlanner.h"
#include <string>
@@ -25,7 +25,7 @@ namespace onert
{
namespace backend
{
-namespace cpu_common
+namespace basic
{
class MemoryPlannerFactory
@@ -40,8 +40,8 @@ public:
IMemoryPlanner *create(const std::string &key);
};
-} // namespace cpu_common
+} // namespace basic
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CPU_COMMON_MEMORY_PLANNER_FACTORY_H__
+#endif // __ONERT_BACKEND_BASIC_MEMORY_PLANNER_FACTORY_H__
diff --git a/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc
index 440f70c93..71cde4cde 100644
--- a/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc
+++ b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc
@@ -14,65 +14,55 @@
* limitations under the License.
*/
-#include "backend/cpu_common/StaticTensorManager.h"
+#include "backend/basic/StaticTensorManager.h"
-#include "backend/cpu_common/DynamicTensorManager.h"
+#include "backend/basic/DynamicTensorManager.h"
+#include "backend/basic/Tensor.h"
#include <util/logging.h>
namespace onert
{
namespace backend
{
-namespace cpu_common
+namespace basic
{
StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
- IDynamicTensorManager *dynamic_tensor_manager)
- : _const_mgr{new DynamicMemoryManager()}, _nonconst_mgr{new MemoryManager()}, _tensors{reg},
- _dynamic_tensor_manager{dynamic_tensor_manager}
+ DynamicTensorManager *dynamic_tensor_manager)
+ : _nonconst_mgr{new MemoryManager()}, _tensors{reg}, _dynamic_tensor_manager{
+ dynamic_tensor_manager}
{
// DO NOTHING
}
-void StaticTensorManager::allocateConsts(void)
+StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
+ const std::string planner_id,
+ DynamicTensorManager *dynamic_tensor_manager)
+ : _nonconst_mgr{new MemoryManager(planner_id)}, _tensors{reg}, _dynamic_tensor_manager{
+ dynamic_tensor_manager}
{
- for (auto &pair : _tensors->native_tensors())
- {
- const auto &ind = pair.first;
- auto tensor = pair.second;
- if (_as_constants[ind])
- {
- auto mem_alloc = _const_mgr->allocate(ind, tensor->total_size());
- tensor->setBuffer(mem_alloc);
- auto buffer = mem_alloc->base();
- VERBOSE(CPU_COMMON_StaticTensorManager) << "CONSTANT TENSOR(#" << ind.value()
- << "): " << static_cast<void *>(buffer)
- << "size : " << tensor->total_size() << std::endl;
- }
- }
+ // DO NOTHING
}
void StaticTensorManager::allocateNonconsts(void)
{
_nonconst_mgr->allocate();
- for (auto &pair : _tensors->native_tensors())
+ for (auto &&pair : _tensors->native_tensors())
{
const auto &ind = pair.first;
- auto tensor = pair.second;
+ auto tensor = pair.second.get();
if (!_as_constants[ind] && !tensor->is_dynamic())
{
auto *buffer = _nonconst_mgr->getBuffer(ind);
tensor->setBuffer(buffer);
- VERBOSE(CPU_COMMON_StaticTensorManager) << "TENSOR(#" << ind.value()
- << "): " << static_cast<void *>(buffer) << std::endl;
+ VERBOSE(CPU_StaticTensorManager)
+ << "TENSOR " << ind << " : " << static_cast<void *>(buffer) << std::endl;
}
}
}
-void StaticTensorManager::deallocateConsts(void) { _const_mgr->deallocate(); }
-
void StaticTensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); }
void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
@@ -80,8 +70,17 @@ void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
bool as_const)
{
assert(!_tensors->getNativeTensor(ind));
- auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout, _dynamic_tensor_manager);
- _tensors->setNativeTensor(ind, tensor);
+ if (as_const)
+ {
+ auto tensor = std::make_unique<ExternalTensor>(tensor_info, backend_layout);
+ _tensors->setNativeTensor(ind, std::move(tensor));
+ }
+ else
+ {
+ auto tensor = std::make_unique<Tensor>(tensor_info, backend_layout,
+ _dynamic_tensor_manager->dynamic_mem_mgr().get());
+ _tensors->setNativeTensor(ind, std::move(tensor));
+ }
_as_constants[ind] = as_const;
}
@@ -113,6 +112,6 @@ void StaticTensorManager::iterate(const std::function<void(const ir::OperandInde
fn(it.first);
}
-} // namespace cpu_common
+} // namespace basic
} // namespace backend
} // namespace onert
diff --git a/runtime/onert/core/src/backend/basic/Tensor.cc b/runtime/onert/core/src/backend/basic/Tensor.cc
new file mode 100644
index 000000000..de1cff4f4
--- /dev/null
+++ b/runtime/onert/core/src/backend/basic/Tensor.cc
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/basic/Tensor.h"
+
+#include "ir/DataType.h"
+#include "backend/basic/MemoryManager.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+Tensor::~Tensor() {}
+
+size_t Tensor::calcOffset(const ir::Coordinates &coords) const
+{
+ auto shape = getShape();
+ size_t rank = shape.rank();
+ rank = rank == 0 ? 1 : rank;
+ size_t offset = 0;
+ for (size_t i = 0; i < rank; ++i)
+ {
+ auto dim = shape.rank() == 0 ? 1 : shape.dim(i);
+ offset = offset * dim + coords[i];
+ }
+ offset *= sizeOfDataType(data_type());
+ return offset;
+}
+
+void Tensor::setShape(const ir::Shape &new_shape) { _info.shape(new_shape); }
+
+bool Tensor::applyShape(const ir::Shape &new_shape)
+{
+ bool previously_dynamic = is_dynamic();
+
+ auto allocTensorMem = [&]() {
+ auto capacity = total_size();
+ assert(_dynamic_mem_mgr);
+ auto alloc = _dynamic_mem_mgr->allocate(this, capacity);
+ setBuffer(alloc);
+ };
+
+ if (!previously_dynamic || buffer() == nullptr)
+ {
+ // Always set shape - when buffer with same size was already allocated, shape could differ
+ setShape(new_shape);
+ set_dynamic();
+ allocTensorMem();
+ }
+ else
+ {
+ auto previous_size = total_size();
+ auto new_size = new_shape.num_elements() * ir::sizeOfDataType(data_type());
+ if (previous_size != new_size)
+ {
+ assert(_dynamic_mem_mgr);
+ _dynamic_mem_mgr->deallocate(this);
+
+ setShape(new_shape);
+ set_dynamic();
+ allocTensorMem();
+ }
+ else
+ { // when buffer with same size was already allocated, shape could differ
+ setShape(new_shape);
+ }
+ }
+ return true;
+}
+
+ir::Shape Tensor::getShape() const { return _info.shape(); }
+
+void Tensor::deallocBuffer()
+{
+ if (_allocator)
+ {
+ _buffer = nullptr;
+ _allocator.reset();
+ if (_dynamic_mem_mgr)
+ {
+ _dynamic_mem_mgr->deallocate(this);
+ }
+ }
+}
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
+
+// ExternalTensor
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+// `dynamic_cast` not working across library boundaries on NDK
+// With this as a key function, `dynamic_cast` works across dl
+ExternalTensor::~ExternalTensor() {}
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/basic/TensorBuilder.cc b/runtime/onert/core/src/backend/basic/TensorBuilder.cc
new file mode 100644
index 000000000..4912af1f5
--- /dev/null
+++ b/runtime/onert/core/src/backend/basic/TensorBuilder.cc
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <backend/basic/TensorBuilder.h>
+
+#include <util/logging.h>
+
+#include <cassert>
+
+namespace onert
+{
+namespace backend
+{
+namespace basic
+{
+
+TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg)
+ : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)},
+ _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
+{
+ /* empty */
+}
+
+TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg,
+ const std::string planner_id)
+ : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)},
+ _static_tensor_mgr{new StaticTensorManager(_tensor_reg, planner_id, _dynamic_tensor_mgr.get())}
+{
+ /* empty */
+}
+
+void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+ ir::Layout layout)
+{
+ _tensor_info_map.emplace(ind, info);
+
+ // CPU backend supports only one layout as NHWC
+ assert(layout == ir::Layout::NHWC);
+ if (info.isDynamic())
+ {
+ _dynamic_tensor_mgr->buildTensor(ind, info, layout);
+ }
+ else
+ {
+ _static_tensor_mgr->buildTensor(ind, info, layout, info.isConstant());
+ }
+}
+
+void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
+{
+ assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
+ const auto &tensor_info = _tensor_info_map.at(ind);
+
+ if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
+ {
+ const auto size = tensor_info.total_size();
+ _static_tensor_mgr->claimPlan(ind, size);
+ }
+}
+
+void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
+{
+ if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
+ {
+ _static_tensor_mgr->releasePlan(ind);
+ }
+}
+
+bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
+{
+ return _tensor_info_map.find(ind) != _tensor_info_map.end();
+}
+
+void TensorBuilder::allocate(void) { _static_tensor_mgr->allocateNonconsts(); }
+
+} // namespace basic
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/cpu_common/Tensor.cc b/runtime/onert/core/src/backend/basic/train/TrainableTensor.cc
index f34564dd9..d09604224 100644
--- a/runtime/onert/core/src/backend/cpu_common/Tensor.cc
+++ b/runtime/onert/core/src/backend/basic/train/TrainableTensor.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,30 +14,36 @@
* limitations under the License.
*/
-#include "backend/cpu_common/Tensor.h"
+#include <backend/basic/train/TrainableTensor.h>
namespace onert
{
namespace backend
{
-namespace cpu_common
+namespace basic
+{
+namespace train
{
-size_t Tensor::calcOffset(const ir::Coordinates &coords) const
+std::vector<ITensor *> TrainableTensor::optVars()
{
- size_t rank = num_dimensions();
- rank = rank == 0 ? 1 : rank;
- size_t offset = 0;
- for (size_t i = 0; i < rank; ++i)
+ std::vector<ITensor *> ret;
+ for (auto &&e : _opt_vars)
{
- offset = offset * dimension(i) + coords[i];
+ ret.emplace_back(e.get());
}
- offset *= sizeOfDataType(data_type());
- return offset;
+ return ret;
}
-void Tensor::setShape(const ir::Shape &new_shape) { _info.shape(new_shape); }
+void TrainableTensor::fillBuffer(const std::shared_ptr<ir::Data> &data)
+{
+ auto *buffer = _tensor.buffer();
+ assert(buffer);
+ assert(total_size() == data->size());
+ std::memcpy(buffer, data->base(), data->size());
+}
-} // namespace cpu_common
+} // namespace train
+} // namespace basic
} // namespace backend
} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/Backend.h b/runtime/onert/core/src/backend/builtin/Backend.h
index 670f7750f..c05494a6a 100644
--- a/runtime/onert/core/src/backend/controlflow/Backend.h
+++ b/runtime/onert/core/src/backend/builtin/Backend.h
@@ -14,16 +14,24 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CONTROLFLOW_BACKEND_H__
-#define __ONERT_BACKEND_CONTROLFLOW_BACKEND_H__
+#ifndef __ONERT_BACKEND_BUILTIN_BACKEND_H__
+#define __ONERT_BACKEND_BUILTIN_BACKEND_H__
+#include "BackendContext.h"
#include "Config.h"
-#include "ConstantInitializer.h"
#include "KernelGenerator.h"
#include "TensorBuilder.h"
#include "Tensor.h"
+#ifdef ONERT_TRAIN
+#include "train/BackendContext.h"
+#include "train/KernelGenerator.h"
+#include "train/TensorRegistry.h"
+#endif // ONERT_TRAIN
#include <backend/Backend.h>
+#ifdef ONERT_TRAIN
+#include <backend/train/ITrainableBackend.h>
+#endif // ONERT_TRAIN
#include <memory>
@@ -31,22 +39,23 @@ namespace onert
{
namespace backend
{
-namespace controlflow
+namespace builtin
{
class Backend : public ::onert::backend::Backend
+#ifdef ONERT_TRAIN
+ ,
+ public backend::train::ITrainableBackend
+#endif // ONERT_TRAIN
{
public:
Backend() : _config{std::make_shared<Config>()} {}
std::shared_ptr<IConfig> config() const override { return _config; }
- std::unique_ptr<BackendContext> newContext(const ir::Graph &graph,
- const std::shared_ptr<custom::IKernelBuilder> &,
- bool) const override
+ std::unique_ptr<onert::backend::BackendContext> newContext(ContextData &&data) const override
{
- const auto &operands = graph.operands();
- auto context = std::make_unique<BackendContext>(this, &graph);
+ auto context = std::make_unique<BackendContext>(this, std::move(data));
// ControlFlow backend may not build tensors for itself because the backend's operation uses
// tensors of other baceknd instead
// But the backend builds tensors in case of that the controlflow operation may have constant
@@ -68,19 +77,33 @@ public:
auto tb = std::make_shared<TensorBuilder>(tr);
context->tensor_registry = tr;
context->tensor_builder = tb;
- context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
- context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb->dynamicTensorManager(), tr);
- context->tensor_register = nullptr;
- context->optimizer = nullptr;
+ context->kernel_gen = std::make_shared<KernelGenerator>(
+ *context->graph(), tb->dynamicTensorManager(), tr, context->external_context());
return context;
}
+#ifdef ONERT_TRAIN
+ std::unique_ptr<backend::train::TrainableBackendContext>
+ newContext(backend::train::TrainableContextData &&tdata) const override
+ {
+ const auto &tgraph = *tdata.tgraph;
+ auto tr = std::make_shared<train::TensorRegistry>();
+ // TODO Create TensorBuilder if necessary
+ auto tdata_ptr = std::make_unique<backend::train::TrainableContextData>(std::move(tdata));
+ auto context = std::make_unique<train::BackendContext>(this, std::move(tdata_ptr), tr);
+
+ context->kernel_gen =
+ std::make_shared<train::KernelGenerator>(tgraph, tr, context->external_context());
+ return context;
+ }
+#endif // ONERT_TRAIN
+
private:
std::shared_ptr<IConfig> _config;
};
-} // namespace controlflow
+} // namespace builtin
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CONTROLFLOW_BACKEND_H__
+#endif // __ONERT_BACKEND_BUILTIN_BACKEND_H__
diff --git a/runtime/onert/core/src/backend/builtin/BackendContext.cc b/runtime/onert/core/src/backend/builtin/BackendContext.cc
new file mode 100644
index 000000000..573617e28
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/BackendContext.cc
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "KernelGenerator.h"
+#include "backend/basic/BackendContextHelpers.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+ITensorRegistry *BackendContext::genTensors() { return basic::genTensors(*this); }
+
+FunctionMap BackendContext::genKernels()
+{
+ FunctionMap ret;
+
+ for (auto &&op_ind : _data.op_order)
+ {
+ auto fn_seq = kernel_gen->generate(op_ind);
+ ret.emplace_back(op_ind, std::move(fn_seq));
+ }
+
+ basic::initConsts(*this);
+
+ // NOTE For memory optimization, we want to free some operand data
+ const_cast<ir::Graph *>(graph())->operands().iterate(
+ [&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
+
+ for (auto &&it : ret)
+ {
+ auto &fn_seq = it.second;
+ fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+ }
+
+ return ret;
+}
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/builtin/BackendContext.h b/runtime/onert/core/src/backend/builtin/BackendContext.h
new file mode 100644
index 000000000..93e825239
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/BackendContext.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_BUILTIN_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "ExternalContext.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+ BackendContext(const Backend *backend, ContextData &&data,
+ std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+ std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+ std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+ : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
+ tensor_builder{tensor_builder}, kernel_gen{kernel_gen},
+ _external_context(std::make_shared<ExternalContext>())
+ {
+ }
+
+ ITensorRegistry *genTensors() override;
+
+ FunctionMap genKernels() override;
+
+ std::shared_ptr<ExternalContext> external_context() { return _external_context; }
+
+private:
+ void planTensors(const std::vector<onert::ir::OperationIndex> &order,
+ const compiler::GraphLowerInfo &lower_info);
+
+public:
+ // TODO Make it private
+ std::shared_ptr<TensorBuilder> tensor_builder;
+ std::shared_ptr<KernelGenerator> kernel_gen;
+
+private:
+ // NOTE ruy context has a thread pool, and when multiple ruy contexts are created,
+ // the thread pool is also created in duplicate
+ // TODO Create one ruy context for session
+ std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/core/src/backend/controlflow/Config.cc b/runtime/onert/core/src/backend/builtin/Config.cc
index 5ec01fe11..e5f6d4c21 100644
--- a/runtime/onert/core/src/backend/controlflow/Config.cc
+++ b/runtime/onert/core/src/backend/builtin/Config.cc
@@ -20,18 +20,18 @@ namespace onert
{
namespace backend
{
-namespace controlflow
+namespace builtin
{
-std::string Config::ID = "controlflow";
+std::string Config::ID = "builtin";
bool Config::initialize() { return true; }
-ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout frontend_layout)
+ir::Layout Config::supportLayout(const ir::IOperation &, ir::Layout frontend_layout)
{
return frontend_layout;
}
-} // namespace controlflow
+} // namespace builtin
} // namespace backend
} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/Config.h b/runtime/onert/core/src/backend/builtin/Config.h
index 6645ed59d..196b299d3 100644
--- a/runtime/onert/core/src/backend/controlflow/Config.h
+++ b/runtime/onert/core/src/backend/builtin/Config.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CONTROLFLOW_CONFIG_H__
-#define __ONERT_BACKEND_CONTROLFLOW_CONFIG_H__
+#ifndef __ONERT_BACKEND_BUILTIN_CONFIG_H__
+#define __ONERT_BACKEND_BUILTIN_CONFIG_H__
#include <backend/IConfig.h>
#include <memory>
@@ -25,7 +25,7 @@ namespace onert
{
namespace backend
{
-namespace controlflow
+namespace builtin
{
class Config : public IConfig
@@ -34,7 +34,7 @@ public:
static std::string ID;
std::string id() override { return ID; }
bool initialize() override;
- ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) override;
+ ir::Layout supportLayout(const ir::IOperation &node, ir::Layout frontend_layout) override;
bool supportPermutation() override { return false; }
bool supportDynamicTensor() override
{
@@ -46,8 +46,8 @@ public:
std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); }
};
-} // namespace controlflow
+} // namespace builtin
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CONTROLFLOW_CONFIG_H__
+#endif // __ONERT_BACKEND_BUILTIN_CONFIG_H__
diff --git a/runtime/onert/core/src/backend/controlflow/UserTensor.cc b/runtime/onert/core/src/backend/builtin/ConstantInitializer.h
index c8e2ebade..6b8eb3e9d 100644
--- a/runtime/onert/core/src/backend/controlflow/UserTensor.cc
+++ b/runtime/onert/core/src/backend/builtin/ConstantInitializer.h
@@ -14,27 +14,22 @@
* limitations under the License.
*/
-#include "UserTensor.h"
+#ifndef __ONERT_COMPILER_BUILTIN_CONSTANT_INITIALIZER_H__
+#define __ONERT_COMPILER_BUILTIN_CONSTANT_INITIALIZER_H__
+
+#include <backend/basic/ConstantInitializer.h>
namespace onert
{
namespace backend
{
-namespace controlflow
+namespace builtin
{
-size_t UserTensor::calcOffset(const ir::Coordinates &coords) const
-{
- size_t rank = num_dimensions();
- size_t offset = 0;
- for (size_t i = 0; i < rank; ++i)
- {
- offset = offset * dimension(i) + coords[i];
- }
- offset *= sizeOfDataType(data_type());
- return offset;
-}
+using ConstantInitializer = basic::ConstantInitializer;
-} // namespace controlflow
+} // namespace builtin
} // namespace backend
} // namespace onert
+
+#endif // __ONERT_COMPILER_BUILTIN_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/core/src/backend/builtin/DynamicTensorManager.h b/runtime/onert/core/src/backend/builtin/DynamicTensorManager.h
new file mode 100644
index 000000000..148948a9c
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/DynamicTensorManager.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_DYNAMICTENSOR_MANAGER_H__
+#define __ONERT_BACKEND_BUILTIN_DYNAMICTENSOR_MANAGER_H__
+
+#include "TensorRegistry.h"
+#include "Tensor.h"
+
+#include <backend/basic/DynamicTensorManager.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+using DynamicTensorManager = basic::DynamicTensorManager;
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_DYNAMICTENSOR_MANAGER_H__
diff --git a/runtime/onert/core/src/backend/builtin/ExternalContext.h b/runtime/onert/core/src/backend/builtin/ExternalContext.h
new file mode 100644
index 000000000..390dbb579
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/ExternalContext.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_EXTERNAL_CONTEXT_H__
+#define __ONERT_BACKEND_BUILTIN_EXTERNAL_CONTEXT_H__
+
+#include <util/ConfigSource.h>
+
+#include <ruy/context.h>
+#include <ruy/context_get_ctx.h>
+#include <ruy/ctx.h>
+#include <ruy/tune.h>
+
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+// TODO Unify this with cpu::ExternalContext
+class ExternalContext
+{
+private:
+ static const int kDefaultNumThreadpoolThreads = 1;
+
+public:
+ ExternalContext() : _ruy_context(std::make_unique<ruy::Context>())
+ {
+ setMaxNumThreads(onert::util::getConfigInt(onert::util::config::RUY_THREADS));
+ initPerThreadState();
+ }
+
+ void setMaxNumThreads(int max_num_threads)
+ {
+ const int target_num_threads =
+ max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads;
+ _ruy_context->set_max_num_threads(target_num_threads);
+ }
+
+ ruy::Context *ruy_context() const { return _ruy_context.get(); }
+
+private:
+ void initPerThreadState()
+ {
+ // Initialize per-thread state.
+ const int thread_count = _ruy_context->max_num_threads();
+ auto ctx = ruy::get_ctx(_ruy_context.get());
+ ctx->EnsureThreadSpecificResources(thread_count);
+ for (int i = 0; i < thread_count; i++)
+ {
+ ctx->GetThreadSpecificTuningResolver(i)->SetTuning(ctx->explicit_tuning());
+ }
+ }
+
+private:
+ const std::unique_ptr<ruy::Context> _ruy_context;
+};
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_EXTERNAL_CONTEXT_H__
diff --git a/runtime/onert/core/src/backend/builtin/IOTensor.cc b/runtime/onert/core/src/backend/builtin/IOTensor.cc
new file mode 100644
index 000000000..f7f4a6977
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/IOTensor.cc
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IOTensor.h"
+
+#include <assert.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+// `dynamic_cast` not working across library boundaries on NDK
+// With this as a key function, `dynamic_cast` works across dl
+IOTensor::~IOTensor() {}
+
+IOTensor::IOTensor(const ir::OperandInfo &info, ir::Layout layout)
+ : IPortableTensor{info}, _orig_info{info}, _orig_layout{layout}
+{
+ setUserTensor(nullptr, 0);
+}
+
+void IOTensor::setTensor(IPortableTensor *tensor)
+{
+ assert(tensor);
+ assert(tensor != this);
+ // TODO Handle when layout was changed
+ assert(tensor->layout() == _orig_layout); // Changing layout is not considered yet
+ _user_tensor.reset();
+ _tensor = tensor;
+}
+
+void IOTensor::setUserTensor(uint8_t *buffer, size_t size)
+{
+ _user_tensor = std::make_unique<UserTensor>(_orig_info, _orig_layout, buffer, size);
+ _tensor = _user_tensor.get();
+}
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/builtin/IOTensor.h b/runtime/onert/core/src/backend/builtin/IOTensor.h
new file mode 100644
index 000000000..d94ed0bca
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/IOTensor.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_IO_TENSOR_H__
+#define __ONERT_BACKEND_BUILTIN_IO_TENSOR_H__
+
+#include "backend/IPortableTensor.h"
+#include "UserTensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+/**
+ * @brief Tensor object that indirects to the tensor it is pointing to.
+ *
+ * A model I/O tensor could be two types.
+ *
+ * 1. @c UserTensor, if it is the primary graph
+ * 2. Any other derivative of @c IPortableTensor from another backend, otherwise
+ *
+ * To support these, this object indirects everything to the actual tensor pointer.
+ * Exceptionally if it is UserTensor, this class creates and manages it.
+ */
+class IOTensor : public IPortableTensor
+{
+public:
+ IOTensor(const ir::OperandInfo &info, ir::Layout layout);
+ ~IOTensor();
+
+public:
+ void setTensor(IPortableTensor *tensor);
+ void setUserTensor(uint8_t *buffer, size_t size);
+ const ir::OperandInfo &orig_info() const { return _orig_info; }
+ ir::Layout orig_layout() const { return _orig_layout; }
+
+public:
+ uint8_t *buffer() const override { return _tensor->buffer(); }
+ size_t total_size() const override { return _tensor->total_size(); }
+ size_t calcOffset(const ir::Coordinates &coords) const override
+ {
+ return _tensor->calcOffset(coords);
+ }
+ ir::Layout layout() const override { return _tensor->layout(); }
+ ir::DataType data_type() const override { return _tensor->data_type(); }
+ bool is_dynamic() const override
+ {
+ return _is_dynamic || _orig_info.isDynamic() || (_tensor && _tensor->is_dynamic());
+ }
+ void set_dynamic() override { _is_dynamic = true; }
+ ir::Shape getShape() const override { return _tensor->getShape(); }
+ void setShape(const ir::Shape &shape) override
+ {
+ // Workaround for IPortableTensor holds _info as its member
+ _info.shape(shape);
+ _tensor->setShape(shape);
+ }
+ bool is_constant() const override { return _tensor->is_constant(); }
+ bool applyShape(const ir::Shape &shape) override
+ {
+ // Workaround for IPortableTensor holds _info as its member
+ _info.shape(shape);
+ return _tensor->applyShape(shape);
+ }
+
+public:
+ void setShapeOfIPortableTensor(const ir::Shape &shape) { _info.shape(shape); }
+
+private:
+ const ir::OperandInfo _orig_info;
+ const ir::Layout _orig_layout;
+ bool _is_dynamic{false};
+ IPortableTensor *_tensor{nullptr}; //< The actual tensor that is indirected
+ std::unique_ptr<UserTensor> _user_tensor; //< If it is a user tensor, it is managed by this object
+};
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_IO_TENSOR_H__
diff --git a/runtime/onert/core/src/backend/builtin/KernelGenerator.cc b/runtime/onert/core/src/backend/builtin/KernelGenerator.cc
new file mode 100644
index 000000000..00c200a92
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/KernelGenerator.cc
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "KernelGenerator.h"
+
+#include "kernel/IfLayer.h"
+#include "kernel/PermuteLayer.h"
+#include "kernel/WhileLayer.h"
+
+#include "exec/FunctionSequence.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+KernelGenerator::KernelGenerator(const ir::Graph &graph, DynamicTensorManager *dyn_tensor_manager,
+ const std::shared_ptr<TensorRegistry> &tensor_reg,
+ const std::shared_ptr<ExternalContext> &external_context)
+ : basic::KernelGeneratorBase{graph}, _dyn_tensor_manager{dyn_tensor_manager},
+ _tensor_reg{tensor_reg}, _tensor_registries{}, _executors{nullptr}, _model_index{},
+ _external_context{external_context}
+{
+ UNUSED_RELEASE(_graph);
+ UNUSED_RELEASE(_tensor_registries);
+ UNUSED_RELEASE(_executors);
+}
+
+std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
+{
+ assert(_dyn_tensor_manager);
+ assert(_tensor_reg);
+
+ auto ret = std::make_unique<exec::FunctionSequence>();
+
+ // Prepare to handle dynamic tensors later
+ auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
+ {
+ dyn_ctx->op = &_graph.operations().at(ind);
+ dyn_ctx->dynamic_shape_inferer =
+ std::make_unique<exec::DynamicShapeInferer>(_graph.operands(), _tensor_reg);
+ }
+ ret->dynamic_tensor_ctx(dyn_ctx);
+
+ auto &op = _graph.operations().at(ind);
+ op.accept(*this);
+ assert(_return_fn); // _return_fn must have been generated
+ ret->append(std::move(_return_fn));
+
+ return ret;
+}
+
+void KernelGenerator::visit(const ir::operation::If &node)
+{
+ const auto then_subg_index = node.param().then_subg_index;
+ const auto else_subg_index = node.param().else_subg_index;
+
+ std::vector<backend::IPortableTensor *> input_tensors;
+ for (const auto &input_index : node.getInputs())
+ {
+ auto input_tensor = getPortableTensor(input_index);
+ input_tensors.emplace_back(input_tensor);
+ }
+
+ std::vector<backend::IPortableTensor *> output_tensors;
+ for (const auto &output_index : node.getOutputs())
+ {
+ auto output_tensor = getPortableTensor(output_index);
+ output_tensors.emplace_back(output_tensor);
+ }
+
+ // IfLayer just set Executors instead of then and else executor to avoid complexity of
+ // creating executor recusively
+ const auto cond_tensor = input_tensors.front();
+ input_tensors.erase(input_tensors.begin());
+ auto fn = std::make_unique<::onert::backend::builtin::kernel::IfLayer>(
+ cond_tensor, input_tensors, output_tensors, then_subg_index, else_subg_index, _executors,
+ _model_index, _external_context);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Permute &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(0)};
+
+ // Add PermuteLayer
+ std::vector<ITensor *> output_tensors{getTensor(output_index)};
+ std::vector<ITensor *> input_tensors{getTensor(input_index)};
+
+ auto fn =
+ std::make_unique<kernel::PermuteLayer>(input_tensors, output_tensors, _external_context);
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::While &node)
+{
+ const auto cond_subg_index = node.param().cond_subg_index;
+ const auto body_subg_index = node.param().body_subg_index;
+
+ // This op does not support input as a constant, because builtin backend does not have
+ // TensorBuilder
+ std::vector<backend::IPortableTensor *> input_tensors;
+ for (const auto &input_index : node.getInputs())
+ {
+ auto input_tensor = getPortableTensor(input_index);
+ input_tensors.emplace_back(input_tensor);
+ }
+
+ std::vector<backend::IPortableTensor *> output_tensors;
+ for (const auto &output_index : node.getOutputs())
+ {
+ auto output_tensor = getPortableTensor(output_index);
+ output_tensors.emplace_back(output_tensor);
+ }
+
+ // WhileLayer just set Executors instead of cond and body executor to avoid complexity of
+ // creating executor recusively
+ auto fn = std::make_unique<::onert::backend::builtin::kernel::WhileLayer>(
+ input_tensors, output_tensors, cond_subg_index, body_subg_index, _executors, _model_index,
+ _dyn_tensor_manager->dynamic_mem_mgr().get(), _external_context);
+
+ _return_fn = std::move(fn);
+}
+
+backend::ITensor *KernelGenerator::getTensor(const ir::OperandIndex &index)
+{
+ // get Tensor from all tensor registries (for Permute op)
+ auto ret = _tensor_registries.getITensor(index);
+ assert(ret != nullptr);
+ return ret;
+}
+
+backend::IPortableTensor *KernelGenerator::getPortableTensor(const ir::OperandIndex &index)
+{
+ auto ret = _tensor_reg->getPortableTensor(index);
+ assert(ret != nullptr);
+ return ret;
+}
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/KernelGenerator.h b/runtime/onert/core/src/backend/builtin/KernelGenerator.h
index b84a810e4..3c86fe306 100644
--- a/runtime/onert/core/src/backend/controlflow/KernelGenerator.h
+++ b/runtime/onert/core/src/backend/builtin/KernelGenerator.h
@@ -14,60 +14,66 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_GENERATOR_H__
-#define __ONERT_BACKEND_CONTROLFLOW_KERNEL_GENERATOR_H__
+#ifndef __ONERT_BACKEND_BUILTIN_KERNEL_GENERATOR_H__
+#define __ONERT_BACKEND_BUILTIN_KERNEL_GENERATOR_H__
-#include <backend/IKernelGenerator.h>
-#include <backend/ITensorBuilder.h>
-#include <exec/IExecutor.h>
-#include <ir/Graph.h>
-#include "TensorBuilder.h"
-#include "compiler/TensorRegistries.h"
+#include "DynamicTensorManager.h"
+#include "ExternalContext.h"
#include "TensorRegistry.h"
+#include "../../compiler/TensorRegistries.h"
+
+#include "backend/basic/KernelGeneratorBase.h"
+#include "exec/IExecutors.h"
+#include "ir/Graph.h"
namespace onert
{
namespace backend
{
-namespace controlflow
+namespace builtin
{
-class KernelGenerator : public IKernelGenerator
+class KernelGenerator : public basic::KernelGeneratorBase
{
public:
- KernelGenerator(const ir::Graph &graph, IDynamicTensorManager *dyn_tensor_manager,
- const std::shared_ptr<TensorRegistry> &tensor_reg);
+ KernelGenerator(const ir::Graph &graph, DynamicTensorManager *dyn_tensor_manager,
+ const std::shared_ptr<TensorRegistry> &tensor_reg,
+ const std::shared_ptr<ExternalContext> &external_context);
void setTensorRegistries(const compiler::TensorRegistries &tensor_registries)
{
_tensor_registries = tensor_registries;
}
- void setExecutorMap(const std::shared_ptr<exec::ExecutorMap> &executor_map)
+ void setExecutors(const std::shared_ptr<exec::IExecutors> &executors)
{
// FIXME Using shared_ptr's raw pointer!
- _executor_map = executor_map.get();
+ _executors = executors.get();
}
- using IKernelGenerator::visit;
+ void setModelIndex(const ir::ModelIndex &index) { _model_index = index; }
+
+ std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) override;
- void visit(const ir::OpSequence &) override;
+private:
void visit(const ir::operation::If &) override;
void visit(const ir::operation::Permute &) override;
void visit(const ir::operation::While &) override;
private:
- std::shared_ptr<backend::ITensor> getTensor(const ir::OperandIndex &index);
+ backend::ITensor *getTensor(const ir::OperandIndex &index);
+ backend::IPortableTensor *getPortableTensor(const ir::OperandIndex &index);
private:
- const ir::Graph &_graph;
- IDynamicTensorManager *_dyn_tensor_manager;
+ DynamicTensorManager *_dyn_tensor_manager;
std::shared_ptr<TensorRegistry> _tensor_reg;
compiler::TensorRegistries _tensor_registries;
- exec::ExecutorMap *_executor_map;
+ exec::IExecutors *_executors;
+ ir::ModelIndex _model_index;
+ const std::shared_ptr<ExternalContext> _external_context;
};
-} // namespace controlflow
+} // namespace builtin
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CONTROLFLOW_KERNEL_GENERATOR_H__
+#endif // __ONERT_BACKEND_BUILTIN_KERNEL_GENERATOR_H__
diff --git a/runtime/onert/core/src/backend/controlflow/UserTensorRegistry.h b/runtime/onert/core/src/backend/builtin/Tensor.h
index fa2a2d54c..d55e64161 100644
--- a/runtime/onert/core/src/backend/controlflow/UserTensorRegistry.h
+++ b/runtime/onert/core/src/backend/builtin/Tensor.h
@@ -14,23 +14,23 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CONTROLFLOW_USER_TENSOR_REGISTRY__
-#define __ONERT_BACKEND_CONTROLFLOW_USER_TENSOR_REGISTRY__
+#ifndef __ONERT_BACKEND_BUILTIN_TENSOR_H__
+#define __ONERT_BACKEND_BUILTIN_TENSOR_H__
-#include "backend/ITensorRegistry.h"
-#include "UserTensor.h"
+#include <backend/basic/Tensor.h>
namespace onert
{
namespace backend
{
-namespace controlflow
+namespace builtin
{
-using UserTensorRegistry = PortableTensorRegistryTemplate<UserTensor>;
+using Tensor = basic::Tensor;
+using ExternalTensor = basic::ExternalTensor;
-} // namespace controlflow
+} // namespace builtin
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CONTROLFLOW_USER_TENSOR_REGISTRY__
+#endif // __ONERT_BACKEND_BUILTIN_TENSOR_H__
diff --git a/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc b/runtime/onert/core/src/backend/builtin/TensorBuilder.cc
index e5c3f5fd5..a2f7af3ea 100644
--- a/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc
+++ b/runtime/onert/core/src/backend/builtin/TensorBuilder.cc
@@ -24,13 +24,13 @@ namespace onert
{
namespace backend
{
-namespace controlflow
+namespace builtin
{
TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg)
- : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)},
- _static_tensor_mgr{
- new cpu_common::StaticTensorManager(_tensor_reg->base_reg(), _dynamic_tensor_mgr.get())}
+ : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg->base_reg())},
+ _static_tensor_mgr{
+ new basic::StaticTensorManager(_tensor_reg->base_reg(), _dynamic_tensor_mgr.get())}
{
/* empty */
}
@@ -40,15 +40,14 @@ void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::Op
{
_tensor_info_map.emplace(ind, info);
- _tensor_layout_map.insert({ind, backend_layout});
-
+ VERBOSE_F() << "cpucommon REGISTER!! " << ind << std::endl;
if (info.isDynamic())
{
- _dynamic_tensor_mgr->buildTensor(ind, info, _tensor_layout_map[ind]);
+ _dynamic_tensor_mgr->buildTensor(ind, info, backend_layout);
}
else
{
- _static_tensor_mgr->buildTensor(ind, info, _tensor_layout_map[ind], info.isConstant());
+ _static_tensor_mgr->buildTensor(ind, info, backend_layout, info.isConstant());
}
}
@@ -58,7 +57,7 @@ void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
if (_tensor_info_map.find(ind) == _tensor_info_map.end()) // Do not proceed for user tensors
return;
- const auto tensor_info = _tensor_info_map.at(ind);
+ const auto &tensor_info = _tensor_info_map.at(ind);
if (!nativeOwnTensorAt(ind)->is_dynamic())
{
@@ -89,39 +88,18 @@ bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
return _tensor_info_map.find(ind) != _tensor_info_map.end();
}
-void TensorBuilder::prepare(void)
-{
- _static_tensor_mgr->allocateConsts();
- _static_tensor_mgr->allocateNonconsts();
-}
+void TensorBuilder::allocate(void) { _static_tensor_mgr->allocateNonconsts(); }
-void TensorBuilder::allocate()
+DynamicTensorManager *TensorBuilder::dynamicTensorManager(void)
{
- // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate
- // This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
+ return _dynamic_tensor_mgr.get();
}
-std::shared_ptr<cpu_common::Tensor> TensorBuilder::nativeOwnTensorAt(const ir::OperandIndex &ind)
+basic::Tensor *TensorBuilder::nativeOwnTensorAt(const ir::OperandIndex &ind)
{
return _tensor_reg->getNativeOwnTensor(ind);
}
-std::unique_ptr<ITensorManager> TensorBuilder::releaseStaticTensorManager(void)
-{
- return std::move(_static_tensor_mgr);
-}
-
-std::unique_ptr<ITensorManager> TensorBuilder::releaseDynamicTensorManager(void)
-{
- return std::move(_dynamic_tensor_mgr);
-}
-
-void TensorBuilder::setNativeUserTensor(const ir::OperandIndex &ind,
- const std::shared_ptr<UserTensor> &tensor)
-{
- _tensor_reg->setNativeUserTensor(ind, tensor);
-}
-
-} // namespace controlflow
+} // namespace builtin
} // namespace backend
} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/TensorBuilder.h b/runtime/onert/core/src/backend/builtin/TensorBuilder.h
index 2f2a2c47e..1e364c927 100644
--- a/runtime/onert/core/src/backend/controlflow/TensorBuilder.h
+++ b/runtime/onert/core/src/backend/builtin/TensorBuilder.h
@@ -14,29 +14,27 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_BUILDER_H__
-#define __ONERT_BACKEND_CONTROLFLOW_TENSOR_BUILDER_H__
+#ifndef __ONERT_BACKEND_BUILTIN_TENSOR_BUILDER_H__
+#define __ONERT_BACKEND_BUILTIN_TENSOR_BUILDER_H__
-#include <backend/cpu_common/StaticTensorManager.h>
-#include <backend/cpu_common/TensorRegistry.h>
-#include <backend/cpu_common/Tensor.h>
+#include <backend/basic/StaticTensorManager.h>
+#include <backend/basic/TensorRegistry.h>
+#include <backend/basic/Tensor.h>
-#include <backend/ITensorBuilder.h>
#include <ir/OperandIndexMap.h>
#include <unordered_map>
#include "DynamicTensorManager.h"
-#include "UserTensorRegistry.h"
namespace onert
{
namespace backend
{
-namespace controlflow
+namespace builtin
{
-class TensorBuilder : public ITensorBuilder
+class TensorBuilder
{
public:
TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg);
@@ -48,42 +46,34 @@ public:
* @param[in] layout Operand data layout
*/
void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- ir::Layout backend_layout) override;
+ ir::Layout backend_layout);
- void notifyFirstUse(const ir::OperandIndex &) override;
- void notifyLastUse(const ir::OperandIndex &) override;
+ void notifyFirstUse(const ir::OperandIndex &);
+ void notifyLastUse(const ir::OperandIndex &);
- bool isRegistered(const ir::OperandIndex &) const override;
+ bool isRegistered(const ir::OperandIndex &) const;
- void prepare(void) override;
- void allocate() override;
- void postFunctionPrepare() override { /* DO NOTHING */}
+ void allocate(void);
- std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) override;
-
- IDynamicTensorManager *dynamicTensorManager(void) override { return _dynamic_tensor_mgr.get(); }
-
- std::unique_ptr<ITensorManager> releaseDynamicTensorManager(void) override;
+ DynamicTensorManager *dynamicTensorManager(void);
/**
* @brief Get tensor with a specific OperandIndex.
* @param ind OperandIndex for the tensor. There must exist a tensor with this ind.
* If not, program will crash with assert or exception.
- * @return shared_ptr<operand::Tensor>
+ * @return operand::Tensor *
*/
- std::shared_ptr<cpu_common::Tensor> nativeOwnTensorAt(const ir::OperandIndex &ind);
- void setNativeUserTensor(const ir::OperandIndex &ind, const std::shared_ptr<UserTensor> &tensor);
+ basic::Tensor *nativeOwnTensorAt(const ir::OperandIndex &ind);
private:
const std::shared_ptr<TensorRegistry> _tensor_reg;
std::unique_ptr<DynamicTensorManager> _dynamic_tensor_mgr;
- std::unique_ptr<cpu_common::StaticTensorManager> _static_tensor_mgr;
+ std::unique_ptr<basic::StaticTensorManager> _static_tensor_mgr;
ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
- ir::OperandIndexMap<ir::Layout> _tensor_layout_map;
};
-} // namespace controlflow
+} // namespace builtin
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CONTROLFLOW_TENSOR_BUILDER_H__
+#endif // __ONERT_BACKEND_BUILTIN_TENSOR_BUILDER_H__
diff --git a/runtime/onert/core/src/backend/builtin/TensorRegistry.h b/runtime/onert/core/src/backend/builtin/TensorRegistry.h
new file mode 100644
index 000000000..ae68b1318
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/TensorRegistry.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_TENSOR_REGISTRY_H__
+#define __ONERT_BACKEND_BUILTIN_TENSOR_REGISTRY_H__
+
+#include "backend/basic/TensorRegistry.h"
+#include "backend/ITensorRegistry.h"
+#include "Tensor.h"
+#include "IOTensor.h"
+#include <assert.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+/**
+ * @brief Tensor registry class for builtin backend
+ *
+ * This class contains three types of tensors. Two native tensors(tensors that are managed by this
+ * backend) and the other is migrant tensor.
+ *
+ * - NativeIOTensor - @c IOTensor managed by this backend ( in @c _base_reg )
+ * - NOTE The tensor it actually points to can be from another backend
+ * - NativeOwnTensor - @c basic::Tensor managed by this backend ( in @c _base_reg )
+ * - MigrantTensor - @c IPortableTensor managed by other backends
+ *
+ * @note @c _base_reg is used in implementation to reuse @c basic::StaticTensorManager
+ *
+ */
+class TensorRegistry : public ITensorRegistry
+{
+public:
+ TensorRegistry() : _base_reg{new basic::TensorRegistry} {}
+
+ ITensor *getITensor(const ir::OperandIndex &ind) override
+ {
+ auto base_tensor = _base_reg->getITensor(ind);
+ if (base_tensor)
+ return base_tensor;
+ return getNativeIOTensor(ind);
+ }
+
+ ITensor *getNativeITensor(const ir::OperandIndex &ind) override
+ {
+ auto base_tensor = _base_reg->getNativeITensor(ind);
+ if (base_tensor)
+ return base_tensor;
+ return getNativeIOTensor(ind);
+ }
+
+ IPortableTensor *getPortableTensor(const ir::OperandIndex &ind)
+ {
+ auto base_tensor = _base_reg->getPortableTensor(ind);
+ if (base_tensor)
+ return base_tensor;
+ return getNativeIOTensor(ind);
+ }
+
+ IPortableTensor *getNativeTensor(const ir::OperandIndex &ind)
+ {
+ auto base_tensor = _base_reg->getNativeTensor(ind);
+ if (base_tensor)
+ return base_tensor;
+ return getNativeIOTensor(ind);
+ }
+
+ Tensor *getNativeOwnTensor(const ir::OperandIndex &ind)
+ {
+ return _base_reg->getNativeTensor(ind);
+ }
+
+ IOTensor *getNativeIOTensor(const ir::OperandIndex &ind)
+ {
+ auto tensor = _native_io_tensors.find(ind);
+ if (tensor != _native_io_tensors.end())
+ return tensor->second.get();
+ return nullptr;
+ }
+
+ bool setMigrantTensor(const ir::OperandIndex &ind, IPortableTensor *tensor) override
+ {
+ assert(tensor);
+ assert(!getITensor(ind)); // For the ind, tensor is not registered yet
+ _base_reg->setMigrantTensor(ind, tensor);
+ return true;
+ }
+
+ void setNativeOwnTensor(ir::OperandIndex ind, std::unique_ptr<Tensor> &&tensor)
+ {
+ assert(tensor);
+ assert(!getITensor(ind)); // For the ind, tensor is not registered yet
+ _base_reg->setNativeTensor(ind, std::move(tensor));
+ }
+
+ void setNativeIOTensor(ir::OperandIndex ind, std::unique_ptr<IOTensor> &&tensor)
+ {
+ assert(tensor);
+ assert(!getITensor(ind)); // For the ind, tensor is not registered yet
+ _native_io_tensors[ind] = std::move(tensor);
+ }
+
+ const ir::OperandIndexMap<std::unique_ptr<IOTensor>> &native_io_tensors()
+ {
+ return _native_io_tensors;
+ }
+ std::shared_ptr<basic::TensorRegistry> base_reg() { return _base_reg; }
+
+private:
+ std::shared_ptr<basic::TensorRegistry> _base_reg;
+ ir::OperandIndexMap<std::unique_ptr<IOTensor>> _native_io_tensors;
+};
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // ifndef __ONERT_BACKEND_BUILTIN_TENSOR_REGISTRY_H__
diff --git a/runtime/onert/core/src/backend/builtin/UserTensor.cc b/runtime/onert/core/src/backend/builtin/UserTensor.cc
new file mode 100644
index 000000000..f0b00b928
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/UserTensor.cc
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "UserTensor.h"
+
+#include "util/Exceptions.h"
+#include "ir/DataType.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+
+size_t UserTensor::calcOffset(const ir::Coordinates &coords) const
+{
+ size_t rank = getShape().rank();
+ size_t offset = 0;
+ for (size_t i = 0; i < rank; ++i)
+ {
+ offset = offset * getShape().dim(i) + coords[i];
+ }
+ offset *= sizeOfDataType(data_type());
+ return offset;
+}
+
+bool UserTensor::applyShape(const ir::Shape &new_shape)
+{
+ // User tensors cannot be reallocated.
+ auto new_size = new_shape.num_elements() * ir::sizeOfDataType(data_type());
+ if (total_size() < new_size)
+ throw InsufficientBufferSizeException{"User given buffer size is too small."};
+ setShape(new_shape);
+ return true;
+}
+
+} // namespace builtin
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/UserTensor.h b/runtime/onert/core/src/backend/builtin/UserTensor.h
index 9be33595d..0d0ed73c5 100644
--- a/runtime/onert/core/src/backend/controlflow/UserTensor.h
+++ b/runtime/onert/core/src/backend/builtin/UserTensor.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CONTROLFLOW_USER_TENSOR_H__
-#define __ONERT_BACKEND_CONTROLFLOW_USER_TENSOR_H__
+#ifndef __ONERT_BACKEND_BUILTIN_USER_TENSOR_H__
+#define __ONERT_BACKEND_BUILTIN_USER_TENSOR_H__
#include "ir/OperandInfo.h"
#include "backend/IPortableTensor.h"
@@ -24,7 +24,7 @@ namespace onert
{
namespace backend
{
-namespace controlflow
+namespace builtin
{
/**
@@ -38,16 +38,12 @@ namespace controlflow
class UserTensor : public IPortableTensor
{
public:
- UserTensor(const ir::OperandInfo &info, ir::Layout layout, uint8_t *buffer, size_t size,
- IDynamicTensorManager *dynamic_tensor_manager)
- : _info{info}, _layout{layout}, _buffer{buffer}, _size{size}, _dynamic{false},
- _dynamic_tensor_manager{dynamic_tensor_manager}
+ UserTensor(const ir::OperandInfo &info, ir::Layout layout, uint8_t *buffer, size_t size)
+ : IPortableTensor{info}, _layout{layout}, _buffer{buffer}, _size{size}, _dynamic{false}
{
}
- UserTensor(const ir::OperandInfo &info, ir::Layout layout,
- IDynamicTensorManager *dynamic_tensor_manager)
- : UserTensor{info, layout, nullptr, 0, dynamic_tensor_manager}
+ UserTensor(const ir::OperandInfo &info, ir::Layout layout) : UserTensor{info, layout, nullptr, 0}
{
}
@@ -61,31 +57,25 @@ public:
public:
uint8_t *buffer() const override { return _buffer; }
size_t total_size() const override { return _size; }
- size_t dimension(size_t index) const override { return _info.shape().dim(index); }
- size_t num_dimensions() const override { return _info.shape().rank(); }
size_t calcOffset(const ir::Coordinates &coords) const override;
ir::Layout layout() const override { return _layout; }
ir::DataType data_type() const override { return _info.typeInfo().type(); }
- float data_scale() const override { return _info.typeInfo().scale(); }
- int32_t data_offset() const override { return _info.typeInfo().offset(); }
bool is_dynamic() const override { return _dynamic; }
void set_dynamic() override { _dynamic = true; }
ir::Shape getShape() const override { return _info.shape(); }
void setShape(const ir::Shape &new_shape) override { _info.shape(new_shape); }
bool is_constant() const override { return false; }
- IDynamicTensorManager *dynamic_tensor_manager() override { return _dynamic_tensor_manager; }
+ bool applyShape(const ir::Shape &) override;
private:
- ir::OperandInfo _info;
ir::Layout _layout;
uint8_t *_buffer;
size_t _size;
bool _dynamic;
- IDynamicTensorManager *_dynamic_tensor_manager;
};
-} // namespace controlflow
+} // namespace builtin
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CONTROLFLOW_USER_TENSOR_H__
+#endif // __ONERT_BACKEND_BUILTIN_USER_TENSOR_H__
diff --git a/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc
new file mode 100644
index 000000000..51bc5a8f2
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IfLayer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace kernel
+{
+
+IfLayer::IfLayer(backend::IPortableTensor *cond_tensor,
+ const std::vector<backend::IPortableTensor *> input_tensors,
+ const std::vector<backend::IPortableTensor *> output_tensors,
+ const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
+ exec::IExecutors *executors, const ir::ModelIndex &model_index,
+ const std::shared_ptr<ExternalContext> &external_context)
+ : _cond_tensor{cond_tensor}, _input_tensors{input_tensors}, _output_tensors{output_tensors},
+ _then_subg_index{then_subg_index}, _else_subg_index{else_subg_index}, _executors{executors},
+ _model_index{model_index}, _external_context{external_context}
+{
+ // At this point, executors may not have executors of then subg and else subg
+}
+
+void IfLayer::run()
+{
+ // Check condition
+ // // If true
+ // // // Set _input_tensors -> then-subg's inputs
+ // // // Set outputs of then-subg -> _output_tensors
+ // // // Run then-subg
+ // // Else
+ // // // Set _input_tensors -> else-subg's inputs
+ // // // Set outputs of else-subg -> _output_tensors
+ // // // Run else-subg
+
+ auto getResultCond = [](backend::IPortableTensor *tensor) -> bool {
+ bool ret = false;
+ tensor->access([&](ITensor &tensor) { ret = *reinterpret_cast<bool *>(tensor.buffer()); });
+ return ret;
+ };
+
+ exec::IExecutor *subg_exec = nullptr;
+ bool cond_result = getResultCond(_cond_tensor);
+ if (cond_result)
+ {
+ VERBOSE(If) << "Call to $" << _then_subg_index << " (then)" << std::endl;
+ subg_exec = _executors->at(_model_index, _then_subg_index);
+ }
+ else
+ {
+ VERBOSE(If) << "Call to $" << _else_subg_index << " (else)" << std::endl;
+ subg_exec = _executors->at(_model_index, _else_subg_index);
+ }
+
+ subg_exec->execute(_input_tensors, _output_tensors);
+ VERBOSE(If) << "Return from $" << (cond_result ? _then_subg_index : _else_subg_index)
+ << std::endl;
+}
+
+} // namespace kernel
+} // namespace builtin
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.h b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h
index ef3a6e6f6..8f639ced9 100644
--- a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.h
+++ b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h
@@ -14,17 +14,18 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_IF_LAYER_H__
-#define __ONERT_BACKEND_CONTROLFLOW_KERNEL_IF_LAYER_H__
+#ifndef __ONERT_BACKEND_BUILTIN_KERNEL_IF_LAYER_H__
+#define __ONERT_BACKEND_BUILTIN_KERNEL_IF_LAYER_H__
-#include <backend/ITensor.h>
-#include <exec/IExecutor.h>
+#include <backend/IPortableTensor.h>
+#include <exec/IExecutors.h>
+#include "../ExternalContext.h"
namespace onert
{
namespace backend
{
-namespace controlflow
+namespace builtin
{
namespace kernel
{
@@ -32,32 +33,30 @@ namespace kernel
class IfLayer : public ::onert::exec::IFunction
{
public:
- IfLayer(const std::shared_ptr<backend::ITensor> &cond_tensor,
- const std::vector<std::shared_ptr<backend::ITensor>> input_tensors,
- const std::vector<std::shared_ptr<backend::ITensor>> output_tensors,
- const ir::OperandIndexSequence &output_indices, const ir::Graph &graph,
- const exec::DynAllocInfoMap &outputs_dyn_alloc_info,
+ IfLayer(backend::IPortableTensor *cond_tensor,
+ const std::vector<backend::IPortableTensor *> input_tensors,
+ const std::vector<backend::IPortableTensor *> output_tensors,
const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
- exec::ExecutorMap *executor_map);
+ exec::IExecutors *executors, const ir::ModelIndex &model_index,
+ const std::shared_ptr<ExternalContext> &external_context);
public:
void run() override;
private:
- const std::shared_ptr<backend::ITensor> _cond_tensor;
- const std::vector<std::shared_ptr<backend::ITensor>> _input_tensors;
- const std::vector<std::shared_ptr<backend::ITensor>> _output_tensors;
- const ir::OperandIndexSequence &_output_indices;
- const ir::Graph &_graph;
- const exec::DynAllocInfoMap _outputs_dyn_alloc_info;
+ backend::IPortableTensor *_cond_tensor;
+ const std::vector<backend::IPortableTensor *> _input_tensors;
+ const std::vector<backend::IPortableTensor *> _output_tensors;
const ir::SubgraphIndex _then_subg_index;
const ir::SubgraphIndex _else_subg_index;
- exec::ExecutorMap *_executor_map;
+ exec::IExecutors *_executors;
+ ir::ModelIndex _model_index;
+ const std::shared_ptr<ExternalContext> _external_context;
};
} // namespace kernel
-} // namespace controlflow
+} // namespace builtin
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CONTROLFLOW_KERNEL_IF_LAYER_H__
+#endif // __ONERT_BACKEND_BUILTIN_KERNEL_IF_LAYER_H__
diff --git a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc
new file mode 100644
index 000000000..600180077
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc
@@ -0,0 +1,316 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PermuteLayer.h"
+
+#include "../../../exec/ShapeConverter.h"
+
+#include <ruy/context.h> // from @ruy
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace kernel
+{
+
+PermuteLayer::PermuteLayer(const std::vector<ITensor *> &src_tensors,
+ const std::vector<ITensor *> &dst_tensors,
+ const std::shared_ptr<ExternalContext> &external_context)
+ : _external_context{external_context}, _tasks_map{}
+{
+ assert(src_tensors.size() == dst_tensors.size());
+ _src_tensors = src_tensors;
+ _dst_tensors = dst_tensors;
+ _src_tensors_offsets.resize(src_tensors.size());
+ _dst_tensors_offsets.resize(dst_tensors.size());
+}
+
+void PermuteLayer::optimize()
+{
+ // Remove copying of tensor as nullptr
+ auto src_it = _src_tensors.begin();
+ auto dst_it = _dst_tensors.begin();
+ auto src_offsets_it = _src_tensors_offsets.begin();
+ auto dst_offsets_it = _dst_tensors_offsets.begin();
+ while (src_it != _src_tensors.end())
+ {
+ if ((*src_it == *dst_it) || (*src_it == nullptr || *dst_it == nullptr))
+ {
+ src_it = _src_tensors.erase(src_it);
+ dst_it = _dst_tensors.erase(dst_it);
+ src_offsets_it = _src_tensors_offsets.erase(src_offsets_it);
+ dst_offsets_it = _dst_tensors_offsets.erase(dst_offsets_it);
+ }
+ else
+ {
+ auto src = *src_it;
+ auto dst = *dst_it;
+ src_offsets_it->resize(0);
+ dst_offsets_it->resize(0);
+ if (underlying_type(src->data_type()) != underlying_type(dst->data_type()))
+ continue;
+ const auto permute_type = [&]() -> PermuteType {
+ if (src->getShape().rank() == 4 && src->layout() == ir::Layout::NHWC &&
+ dst->layout() == ir::Layout::NCHW)
+ {
+ return PermuteType::NHWC_TO_NCHW;
+ }
+ else if (src->getShape().rank() == 4 && src->layout() == ir::Layout::NCHW &&
+ dst->layout() == ir::Layout::NHWC)
+ {
+ return PermuteType::NCHW_TO_NHWC;
+ }
+ else
+ {
+ return PermuteType::COPY;
+ }
+ }();
+
+ // TODO Support different types
+ auto fn = [&](backend::ITensor &src_tensor) {
+ dst->access([&](backend::ITensor &dst_tensor) {
+ // NOTE The buffer of both tensor can be nullptr in this step
+ const auto data_size = ir::sizeOfDataType(src_tensor.data_type());
+
+ if (permute_type == PermuteType::COPY)
+ {
+ if ((!src_tensor.has_padding() && !dst_tensor.has_padding()))
+ {
+ const auto num_elements = src_tensor.getShape().num_elements();
+ const int thread_count =
+ _external_context->ruy_context()->max_num_threads() < static_cast<int>(num_elements)
+ ? _external_context->ruy_context()->max_num_threads()
+ : num_elements;
+
+ std::vector<PermuteWorkerTask> tasks;
+ auto start = 0;
+ for (auto i = 0; i < thread_count; ++i)
+ {
+ int end = start + (num_elements - start) / (thread_count - i);
+ tasks.emplace_back(src_tensor.buffer(), dst_tensor.buffer(), start * data_size,
+ start * data_size, (end - start) * data_size);
+ start = end;
+ }
+ assert(tasks.size() >= 1);
+ _tasks_map[src] = std::move(tasks);
+ }
+ else
+ {
+ auto loop_shape = src_tensor.getShape();
+
+ auto copy_axis = loop_shape.rank() - 1;
+ copy_axis = copy_axis < 0 ? 1 : copy_axis;
+ const auto copy_len = loop_shape.dim(copy_axis) * data_size;
+ loop_shape.dim(copy_axis) = 1;
+
+ appendPermuteTasks(src, dst, loop_shape, copy_len);
+ }
+ }
+ else
+ {
+ assert(src_tensor.getShape().rank() == 4 &&
+ (permute_type == PermuteType::NHWC_TO_NCHW ||
+ permute_type == PermuteType::NCHW_TO_NHWC));
+ const auto loop_shape = src_tensor.getShape();
+ const auto copy_len = data_size;
+
+ appendPermuteTasks(src, dst, loop_shape, copy_len);
+ }
+ });
+ };
+ src->access(fn);
+ src_it++;
+ dst_it++;
+ src_offsets_it++;
+ dst_offsets_it++;
+ }
+ }
+}
+
+void PermuteLayer::appendPermuteTasks(const ITensor *src_tensor, ITensor *dst_tensor,
+ const ir::Shape &loop_shape, size_t size)
+{
+ size_t distributed_dim = 0;
+ auto src_shape = src_tensor->getShape();
+ if (src_tensor->layout() == dst_tensor->layout())
+ {
+ for (int i = 1; i < src_shape.rank() - 1; ++i)
+ {
+ distributed_dim = src_shape.dim(distributed_dim) < src_shape.dim(i) ? i : distributed_dim;
+ }
+ }
+ const auto distributed_dim_val = src_shape.dim(distributed_dim);
+ const int thread_count =
+ _external_context->ruy_context()->max_num_threads() < static_cast<int>(distributed_dim_val)
+ ? _external_context->ruy_context()->max_num_threads()
+ : distributed_dim_val;
+ // NOTE Do not remove this assertion. It would cause performance degradation by new threads to be
+ // created in the context's thread pool
+ assert(thread_count <= _external_context->ruy_context()->max_num_threads());
+
+ std::vector<PermuteWorkerTask> tasks;
+ int start = 0;
+ auto one_thread_loop_shape = loop_shape;
+ for (auto i = 0; i < thread_count; ++i)
+ {
+ ir::Coordinates start_coords(one_thread_loop_shape.rank());
+ start_coords.set(distributed_dim, start);
+ int end = start + (distributed_dim_val - start) / (thread_count - i);
+ one_thread_loop_shape.dim(distributed_dim) = end - start;
+ tasks.emplace_back(*src_tensor, *dst_tensor, start_coords, one_thread_loop_shape, size);
+ start = end;
+ }
+ assert(tasks.size() >= 1);
+ _tasks_map[src_tensor] = std::move(tasks);
+}
+
+void PermuteLayer::runPermuteTasks(backend::ITensor *src, uint8_t *dst_buffer)
+{
+ assert(src->getShape().num_elements() * ir::sizeOfDataType(src->data_type()) <=
+ src->total_size());
+ std::vector<PermuteWorkerTask> &tasks = _tasks_map.at(src);
+ for (size_t i = 0; i < tasks.size(); ++i)
+ {
+ tasks.at(i).setBuffers(src->buffer(), dst_buffer);
+ }
+ assert(tasks.size() >= 1);
+ _external_context->ruy_context()->mutable_thread_pool()->Execute(tasks.size(), tasks.data());
+}
+
+void PermuteLayer::run()
+{
+ assert(_src_tensors.size() == _dst_tensors.size());
+ // PermuteLayer infers dynamic shape inside itself whenever run is called for the following
+ // reasons:
+ // 1. PermuteLayer has to access dynamic tensor manager for input/output tensors of other backends
+ // 2. Other controlflow operation(If/While) uses this layout for copying tensors of other
+ // subgraphs(with other backends)
+ // 3. This infering code is placed here to avoid duplicated code that can be caused by above 2
+ // reasons
+
+ // check if output is not dynamic
+ for (size_t i = 0; i < _src_tensors.size(); ++i)
+ {
+ auto dst_tensor = _dst_tensors.at(i);
+ auto src_tensor = _src_tensors.at(i);
+ if (src_tensor->is_dynamic() || dst_tensor->is_dynamic())
+ {
+ // getting output shape
+ auto src_shape = src_tensor->getShape();
+
+ // set output shape and output buffer
+ ir::Shape new_shape =
+ exec::convertShape(src_shape, src_tensor->layout(), dst_tensor->layout());
+
+ try
+ {
+ if (!dst_tensor->applyShape(new_shape))
+ throw std::runtime_error{
+ "Error: PermuteLayer: output's TensorManager does not support dynamic tensor"};
+ assert(dst_tensor->buffer() != nullptr);
+ }
+ catch (const std::out_of_range &e)
+ {
+ std::cerr << "Error: out_of_range in PermuteLayer: output's TensorManager does not support "
+ "dynamic tensor"
+ << '\n';
+ throw;
+ }
+ }
+ assert(exec::convertShape(src_tensor->getShape(), src_tensor->layout(), dst_tensor->layout()) ==
+ dst_tensor->getShape());
+ }
+ assert(_src_tensors.size() == _dst_tensors.size());
+ assert(_src_tensors.size() == _src_tensors_offsets.size());
+ assert(_dst_tensors.size() == _dst_tensors_offsets.size());
+ auto src_it = _src_tensors.begin();
+ auto dst_it = _dst_tensors.begin();
+ auto src_offsets_it = _src_tensors_offsets.begin();
+ auto dst_offsets_it = _dst_tensors_offsets.begin();
+ while (src_it != _src_tensors.end())
+ {
+ auto src = *src_it;
+ auto dst = *dst_it;
+ auto &src_offsets = *src_offsets_it;
+ auto &dst_offsets = *dst_offsets_it;
+
+ if (src->total_size() == 0)
+ {
+ assert(dst->total_size() == 0);
+ }
+ else
+ {
+ if (src != dst)
+ {
+ // Conditions to run permutation with multithreading
+ // 1. The tasks for multithreathing was created
+ // 2. The tasks's size > 1
+ // 3. Both tensors are not dynamic
+ // 4. Data types of both tensors are different
+ if (_tasks_map.find(src) == _tasks_map.end() || _tasks_map.at(src).size() == 1 ||
+ src->is_dynamic() || dst->is_dynamic() ||
+ underlying_type(src->data_type()) != underlying_type(dst->data_type()))
+ {
+ permute(src, dst, src->getShape().rank(), src_offsets, dst_offsets);
+ }
+ // If dst is subtensor, we have to use clEnqueueMapBuffer instead of clEnqueueWirteBuffer
+ else if (dst->needMemoryMap() && !dst->is_subtensor())
+ {
+ if (!src->has_padding() && !dst->has_padding() && src->layout() == dst->layout())
+ {
+ // This is more effective than multi-threading
+ src->access([&](backend::ITensor &) { dst->enqueueWriteBuffer(src->buffer(), false); });
+ }
+ else
+ {
+ // TODO Optimize this block in case of that padding size of dst is big.
+ _buffers_map[dst].reserve(dst->total_size());
+ auto dst_buffer = _buffers_map[dst].data();
+
+ src->access([&](backend::ITensor &) { runPermuteTasks(src, dst_buffer); });
+ dst->enqueueWriteBuffer(dst_buffer, false);
+ }
+ }
+ else if (src->needMemoryMap() && !src->is_subtensor() && !src->has_padding() &&
+ !dst->has_padding() && src->layout() == dst->layout())
+ {
+ // This is more effective than multi-threading
+ assert(!dst->needMemoryMap());
+ dst->access([&](backend::ITensor &) { src->enqueueReadBuffer(dst->buffer(), true); });
+ }
+ else
+ {
+ auto fn = [&](backend::ITensor &) {
+ dst->access([&](backend::ITensor &) { runPermuteTasks(src, dst->buffer()); });
+ };
+ src->access(fn);
+ }
+ }
+ }
+ src_it++;
+ dst_it++;
+ src_offsets_it++;
+ dst_offsets_it++;
+ }
+}
+
+} // namespace kernel
+} // namespace builtin
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h
new file mode 100644
index 000000000..227e32434
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_KERNEL_PERMUTELAYER_H__
+#define __ONERT_BACKEND_BUILTIN_KERNEL_PERMUTELAYER_H__
+
+#include "../ExternalContext.h"
+#include "../../../exec/IPermuteFunction.h"
+
+#include <ruy/thread_pool.h> // from @ruy
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace kernel
+{
+
+class PermuteLayer : public onert::exec::IPermuteFunction
+{
+public:
+ PermuteLayer(const std::vector<ITensor *> &src_tensors, const std::vector<ITensor *> &dst_tensors,
+ const std::shared_ptr<ExternalContext> &external_context);
+
+ void optimize() override;
+
+ void run() override;
+
+private:
+ std::shared_ptr<ExternalContext> _external_context;
+
+private:
+ void appendPermuteTasks(const ITensor *src_tensor, ITensor *dst_tensor,
+ const ir::Shape &loop_shape, size_t size);
+
+ void runPermuteTasks(backend::ITensor *src, uint8_t *dst_buffer);
+
+ struct PermuteWorkerTask : ruy::Task
+ {
+ using Strides = ir::Coordinates;
+
+ PermuteWorkerTask(const ITensor &src_tensor, ITensor &dst_tensor,
+ const ir::Coordinates &start_coords, const ir::Shape &loop_shape, size_t size)
+ : _src_buffer{src_tensor.buffer()}, _dst_buffer{dst_tensor.buffer()},
+ _src_start_offset{src_tensor.calcOffset(start_coords)},
+ _dst_start_offset{dst_tensor.calcOffset(start_coords)}, _src_strides{}, _dst_strides{},
+ _loop_shape{loop_shape}, _size{size}, _src_layout{src_tensor.layout()},
+ _dst_layout{dst_tensor.layout()}, _is_permutation{true}
+ {
+ // Set strides
+ setStrides(src_tensor, &_src_strides);
+ setStrides(dst_tensor, &_dst_strides);
+
+ _is_permutation = (_src_layout != _dst_layout && loop_shape.rank() == 4);
+ }
+ // Constructor for a copy
+ PermuteWorkerTask(const uint8_t *src_buffer, uint8_t *dst_buffer, uint32_t src_start_offset,
+ uint32_t dst_start_offset, size_t size)
+ : _src_buffer{src_buffer}, _dst_buffer{dst_buffer}, _src_start_offset{src_start_offset},
+ _dst_start_offset{dst_start_offset}, _src_strides{0}, _dst_strides{0},
+ _loop_shape{1}, _size{size}, _src_layout{}, _dst_layout{}, _is_permutation{false}
+ {
+ // DO NOTHING
+ }
+ void setBuffers(const uint8_t *src_buffer, uint8_t *dst_buffer)
+ {
+ _src_buffer = src_buffer;
+ _dst_buffer = dst_buffer;
+ }
+ void Run() override
+ {
+ ShapeLoop(_loop_shape, [&](const onert::ir::Coordinates &coords) {
+ size_t src_offset = _src_start_offset;
+ size_t dst_offset = _dst_start_offset;
+ assert(static_cast<size_t>(_loop_shape.rank()) == coords.size());
+ ir::Coordinates dst_coords = coords;
+ if (_is_permutation)
+ {
+ dst_coords = ir::convertCoordinates(coords, _src_layout, _dst_layout);
+ }
+ for (auto i = 0; i < _loop_shape.rank(); ++i)
+ {
+ assert(coords[i] >= 0 && dst_coords[i] >= 0);
+ src_offset += coords[i] * _src_strides[i];
+ dst_offset += dst_coords[i] * _dst_strides[i];
+ }
+ memcpy(_dst_buffer + dst_offset, _src_buffer + src_offset, _size);
+ });
+ }
+
+ private:
+ void setStrides(const ITensor &tensor, Strides *strides)
+ {
+ auto shape = tensor.getShape();
+ const size_t rank = shape.rank();
+ for (size_t i = 0; i < rank; ++i)
+ {
+ ir::Coordinates no_step(rank), one_step(rank);
+ one_step.set(i, 1);
+ if (shape.dim(i) > 1)
+ {
+ strides->set(i, tensor.calcOffset(one_step) - tensor.calcOffset(no_step));
+ }
+ else
+ {
+ // If dimension value is 0 or 1, the stride of the dimension will be not used
+ // Do not call calcOffset() with coordinate value that is greater than dimension value
+ strides->set(i, 0);
+ }
+ assert((*strides)[i] >= 0);
+ }
+ }
+
+ private:
+ const uint8_t *_src_buffer;
+ uint8_t *_dst_buffer;
+ size_t _src_start_offset;
+ size_t _dst_start_offset;
+ Strides _src_strides;
+ Strides _dst_strides;
+ const ir::Shape _loop_shape;
+ const size_t _size;
+ const ir::Layout _src_layout;
+ const ir::Layout _dst_layout;
+ bool _is_permutation;
+ };
+ std::unordered_map<const ITensor *, std::vector<PermuteWorkerTask>> _tasks_map;
+};
+
+} // namespace kernel
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_KERNEL_PERMUTELAYER_H__
diff --git a/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc
new file mode 100644
index 000000000..8b00db468
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "WhileLayer.h"
+
+#include "PermuteLayer.h"
+#include "../../../exec/ExecutorBase.h"
+
+#include <misc/polymorphic_downcast.h>
+
+#include <algorithm>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace kernel
+{
+
+WhileLayer::WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors,
+ const std::vector<backend::IPortableTensor *> output_tensors,
+ const ir::SubgraphIndex &cond_subg_index,
+ const ir::SubgraphIndex &body_subg_index, exec::IExecutors *executors,
+ const ir::ModelIndex &model_index,
+ basic::DynamicMemoryManager *dyn_memory_manager,
+ const std::shared_ptr<ExternalContext> &external_context)
+ : _cond_subg_index{cond_subg_index}, _body_subg_index{body_subg_index},
+ _input_tensors{input_tensors}, _output_tensors{output_tensors}, _executors{executors},
+ _model_index{model_index}, _dyn_memory_manager{dyn_memory_manager}, _external_context{
+ external_context}
+{
+ // At this point, executors may not have executors of cond subg and body subg
+}
+
+void WhileLayer::run()
+{
+ // Copy "_input_tensors" -> "cond subg inputs"
+ // Run cond subg
+ // Start loop while output of cond subg is ture
+ // // Copy "_input_tensors" -> "body subg inputs" in the first iteration, then copy "body subg
+ // outputs" -> "body subg inputs" in the second or more iterations
+ // // Run body subg
+ // // Copy "body subg outputs" -> "cond subg inputs"
+ // // Run cond subg
+ // If there is no loop copy "_input_tensors" -> "_dst_tensors", else copy "cond subg inputs" ->
+ // "_dst_tensors"
+ auto cond_exec = _executors->at(_model_index, _cond_subg_index);
+ auto body_exec = _executors->at(_model_index, _body_subg_index);
+
+ // Need a temp tensor to hold the cond subgraph output
+ assert(cond_exec->getOutputTensors().size() == 1);
+ auto cond_output_tensor = [&]() {
+ auto cond_output = cond_exec->getOutputTensors().at(0);
+ auto tensor = std::make_unique<Tensor>(cond_output->orig_info(), cond_output->orig_layout(),
+ _dyn_memory_manager);
+ tensor->set_dynamic();
+ tensor->setBuffer(_dyn_memory_manager->allocate(tensor.get(), tensor->total_size()));
+ return tensor;
+ }();
+
+ VERBOSE(While) << "Call to $" << _cond_subg_index << " (cond)" << std::endl;
+ cond_exec->execute(_input_tensors, {cond_output_tensor.get()});
+ VERBOSE(While) << "Return from $" << _cond_subg_index << std::endl;
+
+ auto getResultCond = [](backend::ITensor *tensor) -> bool {
+ bool ret = false;
+ tensor->access([&](ITensor &tensor) { ret = *reinterpret_cast<bool *>(tensor.buffer()); });
+ return ret;
+ };
+
+ std::vector<ITensor *> op_inputs(_input_tensors.begin(), _input_tensors.end());
+ std::vector<ITensor *> op_outputs(_output_tensors.begin(), _output_tensors.end());
+ // Copying body inputs to outputs when the loop body is never executed
+ if (!getResultCond(cond_output_tensor.get()))
+ {
+ PermuteLayer copy_body_inputs_to_op_outputs{op_inputs, op_outputs, _external_context};
+ copy_body_inputs_to_op_outputs.run();
+ return;
+ }
+
+ // Need some temp tensors to hold the body subgraph output
+ std::vector<std::unique_ptr<Tensor>> temp_outputs_o;
+ std::vector<IPortableTensor *> temp_outputs;
+ for (auto &&io_tensor : body_exec->getOutputTensors())
+ {
+ auto tensor = std::make_unique<Tensor>(io_tensor->orig_info(), io_tensor->orig_layout(),
+ _dyn_memory_manager);
+ tensor->set_dynamic();
+ tensor->setBuffer(_dyn_memory_manager->allocate(tensor.get(), tensor->total_size()));
+ temp_outputs.push_back(tensor.get());
+ temp_outputs_o.push_back(std::move(tensor));
+ }
+
+ std::vector<ITensor *> body_outputs(temp_outputs.begin(), temp_outputs.end());
+ PermuteLayer copy_body_outputs_to_op_outputs{body_outputs, op_outputs, _external_context};
+
+ const auto body_execute_with_op_inputs = [&]() {
+ VERBOSE(While) << "Call to $" << _body_subg_index << " (body)" << std::endl;
+ body_exec->execute(_input_tensors, temp_outputs);
+ VERBOSE(While) << "Return from $" << _body_subg_index << std::endl;
+ };
+
+ const auto body_execute_with_body_outputs = [&]() {
+ VERBOSE(While) << "Call to $" << _body_subg_index << " (body)" << std::endl;
+ body_exec->execute(_output_tensors, temp_outputs);
+ VERBOSE(While) << "Return from $" << _body_subg_index << std::endl;
+ };
+
+ std::function<void()> body_execute = body_execute_with_op_inputs;
+ const auto cond_execute = [&]() {
+ VERBOSE(While) << "Call to $" << _cond_subg_index << " (cond)" << std::endl;
+ cond_exec->execute(_output_tensors, {cond_output_tensor.get()});
+ VERBOSE(While) << "Return from $" << _cond_subg_index << std::endl;
+ };
+
+ // Loop while Cond subgraph's output is true
+ while (getResultCond(cond_output_tensor.get()))
+ {
+ body_execute();
+ copy_body_outputs_to_op_outputs.run();
+ cond_execute();
+ body_execute = body_execute_with_body_outputs;
+ }
+
+ // Clean-up the temp tensors
+ _dyn_memory_manager->deallocate(cond_output_tensor.get());
+ for (auto &&tensor : temp_outputs)
+ {
+ _dyn_memory_manager->deallocate(tensor);
+ }
+}
+
+} // namespace kernel
+} // namespace builtin
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.h b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h
index ebca8acdc..40ca4fe23 100644
--- a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.h
+++ b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h
@@ -14,20 +14,23 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_WHILE_LAYER_H__
-#define __ONERT_BACKEND_CONTROLFLOW_KERNEL_WHILE_LAYER_H__
+#ifndef __ONERT_BACKEND_BUILTIN_KERNEL_WHILE_LAYER_H__
+#define __ONERT_BACKEND_BUILTIN_KERNEL_WHILE_LAYER_H__
-#include <backend/ITensor.h>
-#include <exec/IExecutor.h>
+#include <backend/IPortableTensor.h>
+#include <exec/IExecutors.h>
#include <exec/IFunction.h>
#include <ir/OperandIndexSequence.h>
#include <ir/Graph.h>
+#include "../ExternalContext.h"
+
+#include "backend/basic/MemoryManager.h"
namespace onert
{
namespace backend
{
-namespace controlflow
+namespace builtin
{
namespace kernel
{
@@ -35,12 +38,12 @@ namespace kernel
class WhileLayer : public ::onert::exec::IFunction
{
public:
- WhileLayer(const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
- const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const ir::OperandIndexSequence &output_indices, const ir::Graph &graph,
- const exec::DynAllocInfoMap &outputs_dyn_alloc_info,
+ WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors,
+ const std::vector<backend::IPortableTensor *> output_tensors,
const ir::SubgraphIndex &cond_subg_index, const ir::SubgraphIndex &body_subg_index,
- exec::ExecutorMap *executor_map);
+ exec::IExecutors *executors, const ir::ModelIndex &model_index,
+ basic::DynamicMemoryManager *dyn_memory_manager,
+ const std::shared_ptr<ExternalContext> &external_context);
public:
void run() override;
@@ -48,17 +51,17 @@ public:
private:
const ir::SubgraphIndex _cond_subg_index;
const ir::SubgraphIndex _body_subg_index;
- const ir::OperandIndexSequence &_output_indices;
- const ir::Graph &_graph;
- const std::vector<std::shared_ptr<backend::ITensor>> _input_tensors;
- const std::vector<std::shared_ptr<backend::ITensor>> _output_tensors;
- const exec::DynAllocInfoMap _outputs_dyn_alloc_info;
- exec::ExecutorMap *_executor_map;
+ const std::vector<backend::IPortableTensor *> _input_tensors;
+ const std::vector<backend::IPortableTensor *> _output_tensors;
+ exec::IExecutors *_executors;
+ const ir::ModelIndex _model_index;
+ basic::DynamicMemoryManager *_dyn_memory_manager; // For generating temp tensors
+ const std::shared_ptr<ExternalContext> _external_context;
};
} // namespace kernel
-} // namespace controlflow
+} // namespace builtin
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CONTROLFLOW_KERNEL_WHILE_LAYER_H__
+#endif // __ONERT_BACKEND_BUILTIN_KERNEL_WHILE_LAYER_H__
diff --git a/runtime/onert/core/src/backend/builtin/train/BackendContext.cc b/runtime/onert/core/src/backend/builtin/train/BackendContext.cc
new file mode 100644
index 000000000..fa9131f4d
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/train/BackendContext.cc
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "backend/basic/train/TrainableBackendContextHelpers.h"
+#include "exec/FunctionSequence.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace train
+{
+
+backend::ITensorRegistry *BackendContext::genTensors()
+{
+ // For now, there is no need to generate tensors for forwarding.
+ // builtin train backend handles 3 operators: `Permute`, `IF`, `WHILE`.
+ // `Permute`: Tensor generation is not required.
+ // `IF`, `WHILE`: Not supported yet
+ return tensor_registry().get();
+}
+
+backend::train::ITensorRegistry *BackendContext::genTrainingTensors()
+{
+ // For now, there is no need to generate tensors for backwarding.
+ return tensor_registry().get();
+}
+
+backend::train::FunctionMap BackendContext::genKernels()
+{
+ backend::train::FunctionMap ret;
+
+ for (auto &&op_ind : _tdata->op_order)
+ {
+ auto tn_seq = kernel_gen->generate(op_ind);
+ ret.emplace_back(op_ind, std::move(tn_seq));
+ }
+
+ trainable_graph()->operands().iterate(
+ [&](const ir::OperandIndex &ind, const ir::Operand &operand) {
+ if (!external_operands().contains(ind) && operand.isConstant())
+ {
+ throw std::runtime_error(
+ "BackendContext: builtin backend does not support updatable weights yet");
+ }
+ });
+
+ // TODO Enable prepare()
+ // for (auto &&it : ret)
+ // {
+ // auto &fn_seq = it.second;
+ // fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+ // }
+
+ return ret;
+}
+
+} // namespace train
+} // namespace builtin
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/builtin/train/BackendContext.h b/runtime/onert/core/src/backend/builtin/train/BackendContext.h
new file mode 100644
index 000000000..6f8ce4cae
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/train/BackendContext.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_TRAIN_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_BUILTIN_TRAIN_BACKEND_CONTEXT_H__
+
+#include <backend/train/TrainableBackendContext.h>
+
+#include "KernelGenerator.h"
+#include "../ExternalContext.h"
+#include "../TensorBuilder.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace train
+{
+
+class BackendContext : public backend::train::TrainableBackendContext
+{
+public:
+ BackendContext(const backend::train::ITrainableBackend *backend,
+ std::unique_ptr<backend::train::TrainableContextData> &&data,
+ std::shared_ptr<backend::train::ITensorRegistry> tensor_registry = nullptr,
+ std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+ std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+ : backend::train::TrainableBackendContext(backend, std::move(data), tensor_registry),
+ kernel_gen{kernel_gen},
+ _external_context(new ExternalContext), _tensor_builder{tensor_builder}
+ {
+ }
+
+ backend::ITensorRegistry *genTensors() override;
+ backend::train::ITensorRegistry *genTrainingTensors() override;
+
+public:
+ backend::train::FunctionMap genKernels() override;
+
+ std::shared_ptr<ExternalContext> external_context() { return _external_context; }
+
+public:
+ // TODO Make it private
+ std::shared_ptr<KernelGenerator> kernel_gen;
+
+private:
+ // NOTE ruy context has a thread pool, and when multiple ruy contexts are created,
+ // the thread pool is also created in duplicate
+ // TODO Create one ruy context for session
+ std::shared_ptr<ExternalContext> _external_context;
+
+private:
+ std::shared_ptr<TensorBuilder> _tensor_builder;
+};
+
+} // namespace train
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_TRAIN_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/core/src/backend/builtin/train/KernelGenerator.cc b/runtime/onert/core/src/backend/builtin/train/KernelGenerator.cc
new file mode 100644
index 000000000..6f2c0a3b9
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/train/KernelGenerator.cc
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "KernelGenerator.h"
+
+#include "kernel/PermuteLayer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace train
+{
+
+KernelGenerator::KernelGenerator(const ir::train::TrainableGraph &tgraph,
+ const std::shared_ptr<TensorRegistry> &tensor_reg,
+ const std::shared_ptr<ExternalContext> &external_context)
+ : KernelGeneratorBase{tgraph}, _tensor_reg{tensor_reg}, _external_context(external_context)
+{
+}
+
+std::unique_ptr<exec::train::TrainableFnSequence> KernelGenerator::generate(ir::OperationIndex ind)
+{
+ auto ret = std::make_unique<exec::train::TrainableFnSequence>();
+ const auto &op = _tgraph.operation(ind);
+ op.accept(*this);
+ // _return_fn must have been generated
+ if (_return_fn == nullptr)
+ {
+ throw std::runtime_error(op.name() + " op does not supported trainable kernel yet");
+ }
+
+ ret->_functions.emplace_back(std::move(_return_fn));
+
+ return ret;
+}
+
+void KernelGenerator::visit(const ir::train::operation::Permute &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(0)};
+
+ // Add PermuteLayer
+ std::vector<ITensor *> output_tensors{getTensor(output_index)};
+ std::vector<ITensor *> input_tensors{getTensor(input_index)};
+
+ std::vector<ITensor *> output_deriv_tensors;
+ std::vector<ITensor *> input_deriv_tensors;
+
+ auto input_deriv_tensor = getDerivativeTensor(input_index);
+ auto output_deriv_tensor = getDerivativeTensor(output_index);
+ output_deriv_tensors.emplace_back(output_deriv_tensor);
+ input_deriv_tensors.emplace_back(input_deriv_tensor);
+
+ // NOTE IOTensors of graph outputs for passing data to users must be ignored in training
+ // because the buffers of those IOTensors are unnecessary and nullptr
+ bool ignore_forward_in_training = _whole_graph_outputs.contains(output_index);
+ auto fn = std::make_unique<kernel::PermuteLayer>(input_tensors, output_tensors,
+ input_deriv_tensors, output_deriv_tensors,
+ ignore_forward_in_training, _external_context);
+
+ _return_fn = std::move(fn);
+}
+
+backend::ITensor *KernelGenerator::getTensor(const ir::OperandIndex &index)
+{
+ // Get Tensor from all tensor registries (for Permute op)
+ auto ret = _tensor_registries.getITensor(index);
+ assert(ret != nullptr);
+ return ret;
+}
+
+backend::ITensor *KernelGenerator::getDerivativeTensor(const ir::OperandIndex &index)
+{
+ // Get derivative Tensor from all tensor registries (for Permute op)
+ auto ret = _tensor_registries.getDerivativeITensor(index);
+ return ret;
+}
+
+} // namespace train
+} // namespace builtin
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/builtin/train/KernelGenerator.h b/runtime/onert/core/src/backend/builtin/train/KernelGenerator.h
new file mode 100644
index 000000000..d8781c0d0
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/train/KernelGenerator.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUTIN_TRAIN_KERNEL_GENERATOR_H__
+#define __ONERT_BACKEND_BUTIN_TRAIN_KERNEL_GENERATOR_H__
+
+#include "../ExternalContext.h"
+#include "../train/TensorRegistry.h"
+#include "../../../compiler/train/TensorRegistries.h"
+
+#include <backend/train/KernelGeneratorBase.h>
+#include <exec/train/TrainableFnSequence.h>
+#include <ir/train/TrainableGraph.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace train
+{
+
+class KernelGenerator : public backend::train::KernelGeneratorBase
+{
+public:
+ KernelGenerator(const ir::train::TrainableGraph &tgraph,
+ const std::shared_ptr<TensorRegistry> &tensor_reg,
+ const std::shared_ptr<ExternalContext> &external_context);
+
+ std::unique_ptr<exec::train::TrainableFnSequence> generate(ir::OperationIndex ind) override;
+
+ void setTensorRegistries(const compiler::train::TensorRegistries &tensor_registries)
+ {
+ _tensor_registries = tensor_registries;
+ }
+
+ void setWholeGraphOutputs(const ir::OperandIndexSequence &outputs)
+ {
+ _whole_graph_outputs = outputs;
+ }
+
+private:
+ void visit(const ir::train::operation::Permute &) override;
+
+private:
+ backend::ITensor *getTensor(const ir::OperandIndex &index);
+ backend::ITensor *getDerivativeTensor(const ir::OperandIndex &index);
+
+private:
+ std::shared_ptr<TensorRegistry> _tensor_reg;
+ compiler::train::TensorRegistries _tensor_registries;
+ const std::shared_ptr<ExternalContext> _external_context;
+ ir::OperandIndexSequence _whole_graph_outputs;
+};
+
+} // namespace train
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUTIN_TRAIN_KERNEL_GENERATOR_H__
diff --git a/runtime/onert/core/src/backend/builtin/train/Tensor.h b/runtime/onert/core/src/backend/builtin/train/Tensor.h
new file mode 100644
index 000000000..611407bd2
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/train/Tensor.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_TRAIN_TRAINABLE_TENSOR_H__
+#define __ONERT_BACKEND_BUILTIN_TRAIN_TRAINABLE_TENSOR_H__
+
+#include <backend/basic/train/TrainableTensor.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace train
+{
+
+using TrainableTensor = basic::train::TrainableTensor;
+using DerivativeTensor = basic::Tensor;
+using GradientTensor = basic::Tensor;
+
+} // namespace train
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_TRAIN_TRAINABLE_TENSOR_H__
diff --git a/runtime/onert/core/src/backend/builtin/train/TensorRegistry.h b/runtime/onert/core/src/backend/builtin/train/TensorRegistry.h
new file mode 100644
index 000000000..c48e5fe93
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/train/TensorRegistry.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_TRAIN_TENSOR_REGISTRY_H__
+#define __ONERT_BACKEND_BUILTIN_TRAIN_TENSOR_REGISTRY_H__
+
+#include <backend/train/ITensorRegistry.h>
+
+#include "../IOTensor.h"
+#include "../Tensor.h"
+#include "Tensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace train
+{
+
+using BaseTensorRegistry =
+ backend::train::PortableTensorRegistryTemplate<Tensor, TrainableTensor, DerivativeTensor,
+ GradientTensor>;
+
+class TensorRegistry : public backend::train::ITensorRegistry
+{
+public:
+ TensorRegistry() : _base_reg{new BaseTensorRegistry} {}
+
+ ITensor *getITensor(const ir::OperandIndex &index) override
+ {
+ auto base_tensor = _base_reg->getITensor(index);
+ if (base_tensor)
+ return base_tensor;
+ return getNativeIOTensor(index);
+ }
+
+ ITensor *getNativeITensor(const ir::OperandIndex &index) override
+ {
+ auto base_tensor = _base_reg->getNativeITensor(index);
+ if (base_tensor)
+ return base_tensor;
+ return getNativeIOTensor(index);
+ }
+
+ IPortableTensor *getPortableTensor(const ir::OperandIndex &index)
+ {
+ auto base_tensor = _base_reg->getPortableTensor(index);
+ if (base_tensor)
+ return base_tensor;
+ return getNativeIOTensor(index);
+ }
+
+ IOTensor *getNativeIOTensor(const ir::OperandIndex &index)
+ {
+ auto tensor = _native_io_tensors.find(index);
+ if (tensor != _native_io_tensors.end())
+ return tensor->second.get();
+ return nullptr;
+ }
+
+ ITensor *getDerivativeITensor(const ir::OperandIndex &index) override
+ {
+ return _base_reg->getDerivativeTensor(index);
+ }
+
+ ITensor *getGradientITensor(const ir::OperandIndex &index) override
+ {
+ return _base_reg->getGradientTensor(index);
+ }
+
+ DerivativeTensor *getDerivativeTensor(const ir::OperandIndex &index)
+ {
+ return _base_reg->getDerivativeTensor(index);
+ }
+
+ bool setMigrantTensor(const ir::OperandIndex &index, IPortableTensor *tensor) override
+ {
+ assert(tensor);
+ assert(!getITensor(index)); // For the index, tensor is not registered yet
+ _base_reg->setMigrantTensor(index, tensor);
+ return true;
+ }
+
+ void setDerivativeTensor(const ir::OperandIndex &index, std::unique_ptr<DerivativeTensor> tensor)
+ {
+ _base_reg->setDerivativeTensor(index, std::move(tensor));
+ }
+
+ void setGradientTensor(const ir::OperandIndex &index, std::unique_ptr<GradientTensor> tensor)
+ {
+ _base_reg->setGradientTensor(index, std::move(tensor));
+ }
+
+ void setNativeIOTensor(ir::OperandIndex index, std::unique_ptr<IOTensor> &&tensor)
+ {
+ assert(tensor);
+ assert(!getITensor(index)); // For the index, tensor is not registered yet
+ _native_io_tensors[index] = std::move(tensor);
+ }
+
+ const ir::OperandIndexMap<std::unique_ptr<IOTensor>> &native_io_tensors()
+ {
+ return _native_io_tensors;
+ }
+ std::shared_ptr<BaseTensorRegistry> base_reg() { return _base_reg; }
+
+private:
+ std::shared_ptr<BaseTensorRegistry> _base_reg;
+ ir::OperandIndexMap<std::unique_ptr<IOTensor>> _native_io_tensors;
+};
+
+} // namespace train
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_TRAIN_TENSOR_REGISTRY_H__
diff --git a/runtime/onert/core/src/backend/builtin/train/kernel/PermuteLayer.cc b/runtime/onert/core/src/backend/builtin/train/kernel/PermuteLayer.cc
new file mode 100644
index 000000000..929092dde
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/train/kernel/PermuteLayer.cc
@@ -0,0 +1,85 @@
+
+
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PermuteLayer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace train
+{
+namespace kernel
+{
+
+PermuteLayer::PermuteLayer(const std::vector<ITensor *> &src_tensors,
+ const std::vector<ITensor *> &dst_tensors,
+ const std::vector<ITensor *> &input_deriv_tensors,
+ const std::vector<ITensor *> &output_deriv_tensors,
+ bool ignore_forward_in_training,
+ const std::shared_ptr<ExternalContext> &external_context)
+ : builtin::kernel::PermuteLayer{src_tensors, dst_tensors, external_context},
+ _input_deriv_tensors{input_deriv_tensors}, _output_deriv_tensors{output_deriv_tensors},
+ _ignore_forward_in_training{ignore_forward_in_training}
+{
+ assert(input_deriv_tensors.size() == output_deriv_tensors.size());
+ assert(src_tensors.size() == dst_tensors.size());
+}
+
+void PermuteLayer::optimize()
+{
+ builtin::kernel::PermuteLayer::optimize();
+
+ // TODO Calculate offsets of derivative tensors if necessary
+}
+
+void PermuteLayer::forward(bool training)
+{
+ if (training && _ignore_forward_in_training)
+ return;
+
+ builtin::kernel::PermuteLayer::run();
+}
+
+void PermuteLayer::backward()
+{
+ for (uint32_t i = 0; i < _output_deriv_tensors.size(); ++i)
+ {
+ auto src_deriv = _output_deriv_tensors.at(i);
+ auto dst_deriv = _input_deriv_tensors.at(i);
+
+ // NOTE The derivative tensors corresponding to inputs/outputs of model are nullptr
+ // because permuting those tensors is meaningless
+ if (src_deriv && dst_deriv)
+ {
+ const auto rank = src_deriv->getShape().rank();
+ auto output_offsets = _dst_tensors_offsets.at(i);
+ auto input_offsets = _src_tensors_offsets.at(i);
+
+ exec::IPermuteFunction::permute(src_deriv, dst_deriv, rank, output_offsets, input_offsets);
+ }
+ }
+}
+
+} // namespace kernel
+} // namespace train
+} // namespace builtin
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/builtin/train/kernel/PermuteLayer.h b/runtime/onert/core/src/backend/builtin/train/kernel/PermuteLayer.h
new file mode 100644
index 000000000..de8063a21
--- /dev/null
+++ b/runtime/onert/core/src/backend/builtin/train/kernel/PermuteLayer.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_BUILTIN_TRAIN_KERNEL_PERMUTELAYER_H__
+#define __ONERT_BACKEND_BUILTIN_TRAIN_KERNEL_PERMUTELAYER_H__
+
+#include "../../kernel/PermuteLayer.h"
+
+#include "exec/train/ITrainableFunction.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace builtin
+{
+namespace train
+{
+namespace kernel
+{
+
+class PermuteLayer : public builtin::kernel::PermuteLayer, public exec::train::ITrainableFunction
+{
+public:
+ PermuteLayer(const std::vector<ITensor *> &src_tensors, const std::vector<ITensor *> &dst_tensors,
+ const std::vector<ITensor *> &input_deriv_tensors,
+ const std::vector<ITensor *> &output_deriv_tensors, bool ignore_forward_in_training,
+ const std::shared_ptr<ExternalContext> &external_context);
+
+ void optimize() override;
+
+ void forward(bool training) override;
+ void backward() override;
+
+private:
+ std::vector<ITensor *> _input_deriv_tensors;
+ std::vector<ITensor *> _output_deriv_tensors;
+ bool _ignore_forward_in_training;
+};
+
+} // namespace kernel
+} // namespace train
+} // namespace builtin
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_BUILTIN_TRAIN_KERNEL_PERMUTELAYER_H__
diff --git a/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h b/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h
deleted file mode 100644
index e21a8f357..000000000
--- a/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__
-#define __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__
-
-#include "TensorRegistry.h"
-
-#include <backend/IConstantInitializer.h>
-#include <ir/Operands.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-class ConstantInitializer : public IConstantInitializer
-{
-public:
- ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<ITensorRegistry> &tensor_reg)
- : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
- {
- }
-
-private:
- std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; }
-
-private:
- std::shared_ptr<ITensorRegistry> _tensor_reg;
-};
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc
deleted file mode 100644
index 1288e4c96..000000000
--- a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DynamicTensorManager.h"
-
-#include "util/logging.h"
-#include "util/Exceptions.h"
-#include "ir/DataType.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-DynamicTensorManager::DynamicTensorManager(const std::shared_ptr<TensorRegistry> &tensors)
- : _dynamic_mem_mgr{new cpu_common::DynamicMemoryManager()}, _tensors{tensors}
-{
- // DO NOTHING
-}
-
-void DynamicTensorManager::applyShape(const ir::OperandIndex &ind, const ir::Shape &new_shape)
-{
- // NOTE Handle user tensors first
- auto user_tensor = _tensors->getNativeUserTensor(ind);
- if (user_tensor)
- {
- // User tensors cannot be reallocated.
- auto buffer_size = user_tensor->total_size();
- auto new_size = new_shape.num_elements() * sizeOfDataType(user_tensor->data_type());
- if (buffer_size < new_size)
- throw InsufficientBufferSizeException{"Output buffer size is less than output tensor size"};
- user_tensor->setShape(new_shape);
- return;
- }
-
- // NOTE Then handle own tensors
- auto tensor = _tensors->getNativeOwnTensor(ind);
- assert(tensor);
-
- bool previously_dynamic = tensor->is_dynamic();
-
- auto allocTensorMem = [&](bool overwrite = false) {
- auto capacity = tensor->total_size();
- auto alloc = _dynamic_mem_mgr->allocate(ind, capacity);
-
- if (overwrite)
- tensor->overwriteBuffer(alloc);
- else
- tensor->setBuffer(alloc);
- };
-
- if (!previously_dynamic)
- {
- // TODO deallocate tensor->buffer()
- // issue is that staticTensorManager might have allocate this memory
- tensor->setShape(new_shape);
- tensor->set_dynamic();
- allocTensorMem(true);
- }
- else if (tensor->buffer() == nullptr)
- {
- tensor->setShape(new_shape);
- tensor->set_dynamic();
- allocTensorMem();
- }
- // when buffer was already allocated and new_shape requires different size
- else
- {
- auto previous_size = tensor->total_size();
- auto new_size = new_shape.num_elements() * sizeOfDataType(tensor->data_type());
- if (previous_size != new_size)
- {
- _dynamic_mem_mgr->deallocate(ind);
-
- tensor->setShape(new_shape);
- tensor->set_dynamic();
- allocTensorMem(true);
- }
- else
- { // when buffer with same size was already allocated, shape could differ
- tensor->setShape(new_shape);
- }
- }
-}
-
-void DynamicTensorManager::buildTensor(const ir::OperandIndex &ind,
- const ir::OperandInfo &tensor_info,
- ir::Layout backend_layout)
-{
- auto tensor = std::make_shared<cpu_common::Tensor>(tensor_info, backend_layout, this);
- _tensors->setNativeOwnTensor(ind, tensor);
-}
-
-void DynamicTensorManager::planDealloc(ir::OperationIndex op_ind, ir::OperandIndex operand_ind)
-{
- _dealloc_tensor_map[op_ind].emplace(operand_ind);
-}
-
-void DynamicTensorManager::deallocInput(ir::OperationIndex op_ind)
-{
- auto find = _dealloc_tensor_map.find(op_ind);
- if (find == _dealloc_tensor_map.end())
- return;
-
- auto &input_set = find->second;
- for (auto input_ind : input_set)
- {
- if (!_tensors->getNativeTensor(input_ind)->is_dynamic())
- continue;
-
- _dynamic_mem_mgr->deallocate(input_ind);
- VERBOSE(DynamicTensorManager) << "Deallocating #" << input_ind.value()
- << " (input of op_ind: " << op_ind.value() << ")" << std::endl;
- }
-}
-
-void DynamicTensorManager::deallocSubgraphOutput(ir::OperandIndex output_ind)
-{
- if (!_tensors->getNativeTensor(output_ind)->is_dynamic())
- return;
-
- _dynamic_mem_mgr->deallocate(output_ind);
- VERBOSE(DynamicTensorManager) << "Deallocating #" << output_ind.value()
- << " (output of a subgraph)" << std::endl;
-}
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h
deleted file mode 100644
index dbe388ba2..000000000
--- a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_DYNAMICTENSOR_MANAGER_H__
-#define __ONERT_BACKEND_CONTROLFLOW_DYNAMICTENSOR_MANAGER_H__
-
-#include "TensorRegistry.h"
-#include "Tensor.h"
-
-#include <backend/IDynamicTensorManager.h>
-#include <backend/cpu_common/MemoryManager.h>
-#include <ir/OperandInfo.h>
-#include <ir/Operation.h>
-#include <ir/Index.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-/**
- * @brief Class to manage dynamic tensor and its memory
- */
-class DynamicTensorManager : public backend::IDynamicTensorManager
-{
-public:
- DynamicTensorManager(const std::shared_ptr<TensorRegistry> &tensors);
-
- virtual ~DynamicTensorManager() = default;
-
- void applyShape(const ir::OperandIndex &ind, const ir::Shape &new_shape) override;
-
- void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
- ir::Layout backend_layout);
-
- void planDealloc(ir::OperationIndex op_ind, ir::OperandIndex operand_ind) override;
- void deallocInput(ir::OperationIndex op_ind) override;
- void deallocSubgraphOutput(ir::OperandIndex ind) override;
-
-private:
- /**
- * @brief Memory manager for dynamic tensor.
- * @todo DynamicMemoryManager is not optimized. Optimized one is needed
- */
- std::shared_ptr<cpu_common::DynamicMemoryManager> _dynamic_mem_mgr;
- const std::shared_ptr<TensorRegistry> _tensors;
-
- // contains list of dynamic tensor index, which can be deallocated after running operation
- // note: this map could contain static tensor index too. Careful use is required.
- std::unordered_map<ir::OperationIndex, std::unordered_set<ir::OperandIndex>> _dealloc_tensor_map;
-};
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_DYNAMICTENSOR_MANAGER_H__
diff --git a/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc b/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc
deleted file mode 100644
index de5a6a5f6..000000000
--- a/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "KernelGenerator.h"
-
-#include <backend/BackendContext.h>
-#include <util/Utils.h>
-#include "kernel/IfLayer.h"
-#include "kernel/WhileLayer.h"
-#include "kernel/PermuteLayer.h"
-#include "exec/ExecutorBase.h"
-#include "exec/FunctionSequence.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-KernelGenerator::KernelGenerator(const ir::Graph &graph, IDynamicTensorManager *dyn_tensor_manager,
- const std::shared_ptr<TensorRegistry> &tensor_reg)
- : _graph{graph}, _dyn_tensor_manager{dyn_tensor_manager}, _tensor_reg{tensor_reg},
- _tensor_registries{}, _executor_map{nullptr}
-{
- UNUSED_RELEASE(_graph);
- UNUSED_RELEASE(_tensor_registries);
- UNUSED_RELEASE(_executor_map);
-}
-
-void KernelGenerator::visit(const ir::OpSequence &op_seq)
-{
- assert(!_return_fn_seq);
- assert(_dyn_tensor_manager);
- assert(_tensor_reg);
-
- auto dyn_shape_inferer =
- std::make_unique<exec::DynamicShapeInferer>(_graph.operands(), _tensor_reg);
-
- _return_fn_seq = std::make_unique<exec::FunctionSequence>();
-
- // Prepare to handle dynamic tensors later
- auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
- {
- dyn_ctx->op_seq = &op_seq;
- dyn_ctx->operations = &_graph.operations();
- dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
- dyn_ctx->tensor_registry = _tensor_reg;
- dyn_ctx->dynamic_tensor_manager = _dyn_tensor_manager;
-
- _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
- }
- _return_fn_seq->enableDynamicShapeInferer(true);
-
- for (const auto &op_idx : op_seq.operations())
- {
- const auto &node = _graph.operations().at(op_idx);
- node.accept(*this);
- _return_fn_seq->append(releaseFunction());
- }
-}
-
-void KernelGenerator::visit(const ir::operation::If &node)
-{
- const auto then_subg_index = node.param().then_subg_index;
- const auto else_subg_index = node.param().else_subg_index;
-
- std::vector<std::shared_ptr<backend::ITensor>> input_tensors;
- for (const auto input_index : node.getInputs())
- {
- auto input_tensor = getTensor(input_index);
-
- input_tensors.emplace_back(input_tensor);
- }
-
- std::vector<std::shared_ptr<backend::ITensor>> output_tensors;
- exec::DynAllocInfoMap outputs_dyn_alloc_info;
- for (const auto output_index : node.getOutputs())
- {
- auto output_tensor = getTensor(output_index);
-
- output_tensors.emplace_back(output_tensor);
- outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index};
- }
-
- // IfLayer just set ExecutorMap instead of then and else executor to avoid complexity of
- // creating executor recusively
- const auto cond_tensor = input_tensors.front();
- input_tensors.erase(input_tensors.begin());
- auto fn = std::make_unique<::onert::backend::controlflow::kernel::IfLayer>(
- cond_tensor, input_tensors, output_tensors, node.getOutputs(), _graph, outputs_dyn_alloc_info,
- then_subg_index, else_subg_index, _executor_map);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Permute &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(0)};
-
- // Add PermuteLayer
- std::vector<std::shared_ptr<ITensor>> output_tensors{getTensor(output_index)};
- std::vector<std::shared_ptr<ITensor>> input_tensors{getTensor(input_index)};
- std::unordered_map<std::shared_ptr<ITensor>, exec::DynAllocInfo> outputs_dyn_alloc_info;
- outputs_dyn_alloc_info[output_tensors.at(0)] = exec::DynAllocInfo{output_index};
-
- auto fn =
- std::make_unique<kernel::PermuteLayer>(input_tensors, output_tensors, outputs_dyn_alloc_info);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::While &node)
-{
- const auto cond_subg_index = node.param().cond_subg_index;
- const auto body_subg_index = node.param().body_subg_index;
-
- // This op does not support input as a constant, because controlflow backend does not have
- // TensorBuilder
- std::vector<std::shared_ptr<backend::ITensor>> input_tensors;
- for (const auto input_index : node.getInputs())
- {
- auto input_tensor = getTensor(input_index);
-
- input_tensors.emplace_back(input_tensor);
- }
-
- std::vector<std::shared_ptr<backend::ITensor>> output_tensors;
- std::unordered_map<std::shared_ptr<ITensor>, exec::DynAllocInfo> outputs_dyn_alloc_info;
- for (const auto output_index : node.getOutputs())
- {
- auto output_tensor = getTensor(output_index);
-
- output_tensors.emplace_back(output_tensor);
-
- outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index};
- }
-
- // WhileLayer just set ExecutorMap instead of cond and body executor to avoid complexity of
- // creating executor recusively
- auto fn = std::make_unique<::onert::backend::controlflow::kernel::WhileLayer>(
- input_tensors, output_tensors, node.getOutputs(), _graph, outputs_dyn_alloc_info,
- cond_subg_index, body_subg_index, _executor_map);
-
- _return_fn = std::move(fn);
-}
-
-std::shared_ptr<backend::ITensor> KernelGenerator::getTensor(const ir::OperandIndex &index)
-{
- std::shared_ptr<backend::ITensor> ret = _tensor_registries.getITensor(index);
- assert(ret != nullptr);
- return ret;
-}
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/TensorRegistry.h b/runtime/onert/core/src/backend/controlflow/TensorRegistry.h
deleted file mode 100644
index 678c5b73b..000000000
--- a/runtime/onert/core/src/backend/controlflow/TensorRegistry.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_REGISTRY_H__
-#define __ONERT_BACKEND_CONTROLFLOW_TENSOR_REGISTRY_H__
-
-#include "backend/cpu_common/TensorRegistry.h"
-#include "backend/ITensorRegistry.h"
-#include "Tensor.h"
-#include "UserTensor.h"
-#include <assert.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-
-/**
- * @brief Tensor registry class for controlflow backend
- *
- * This class contains three types of tensors. Two native tensors(tensors that are managed by this
- * backend) and the other is migrant tensor.
- *
- * - NativeUserTensor - @c UserTensor managed by this backend, buffer is user-given
- * - NativeOwnTensor - @c cpu_common::Tensor managed by this backend ( in @c _base_reg )
- * - MigrantTensor - @c IPortableTensor managed by other backends ( in @c _base_reg )
- *
- * @note @c _base_reg is used in implementation to reuse @c cpu_common::StaticTensorManager
- *
- */
-class TensorRegistry : public ITensorRegistry
-{
-public:
- TensorRegistry() : _base_reg{new cpu_common::TensorRegistry} {}
-
- std::shared_ptr<ITensor> getITensor(const ir::OperandIndex &ind) override
- {
- auto base_tensor = _base_reg->getITensor(ind);
- if (base_tensor)
- return base_tensor;
- return getNativeUserTensor(ind);
- }
-
- std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &ind) override
- {
- auto base_tensor = _base_reg->getNativeITensor(ind);
- if (base_tensor)
- return base_tensor;
- return getNativeUserTensor(ind);
- }
-
- std::shared_ptr<IPortableTensor> getPortableTensor(const ir::OperandIndex &ind)
- {
- auto base_tensor = _base_reg->getPortableTensor(ind);
- if (base_tensor)
- return base_tensor;
- return getNativeUserTensor(ind);
- }
-
- std::shared_ptr<IPortableTensor> getNativeTensor(const ir::OperandIndex &ind)
- {
- auto base_tensor = _base_reg->getNativeTensor(ind);
- if (base_tensor)
- return base_tensor;
- return getNativeUserTensor(ind);
- }
-
- std::shared_ptr<Tensor> getNativeOwnTensor(const ir::OperandIndex &ind)
- {
- return _base_reg->getNativeTensor(ind);
- }
-
- std::shared_ptr<UserTensor> getNativeUserTensor(const ir::OperandIndex &ind)
- {
- auto tensor = _native_user_tensors.find(ind);
- if (tensor != _native_user_tensors.end())
- return tensor->second;
- return nullptr;
- }
-
- bool setMigrantTensor(const ir::OperandIndex &ind,
- const std::shared_ptr<IPortableTensor> &tensor) override
- {
- assert(tensor);
- assert(!getITensor(ind)); // For the ind, tensor is not registered yet
- _base_reg->setMigrantTensor(ind, tensor);
- return true;
- }
-
- void setNativeOwnTensor(ir::OperandIndex ind, const std::shared_ptr<Tensor> &tensor)
- {
- assert(tensor);
- assert(!getITensor(ind)); // For the ind, tensor is not registered yet
- _base_reg->setNativeTensor(ind, tensor);
- }
-
- void setNativeUserTensor(ir::OperandIndex ind, const std::shared_ptr<UserTensor> &tensor)
- {
- assert(tensor);
- assert(!getITensor(ind)); // For the ind, tensor is not registered yet
- _native_user_tensors[ind] = tensor;
- }
-
- const ir::OperandIndexMap<std::shared_ptr<UserTensor>> &native_user_tensors()
- {
- return _native_user_tensors;
- }
- std::shared_ptr<cpu_common::TensorRegistry> base_reg() { return _base_reg; }
-
-private:
- std::shared_ptr<cpu_common::TensorRegistry> _base_reg;
- ir::OperandIndexMap<std::shared_ptr<UserTensor>> _native_user_tensors;
-};
-
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_REGISTRY_H__
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc b/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc
deleted file mode 100644
index 8377c7183..000000000
--- a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "IfLayer.h"
-
-#include <backend/ITensor.h>
-#include "exec/ExecutorBase.h"
-#include <misc/polymorphic_downcast.h>
-#include "PermuteLayer.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-namespace kernel
-{
-
-IfLayer::IfLayer(const std::shared_ptr<backend::ITensor> &cond_tensor,
- const std::vector<std::shared_ptr<backend::ITensor>> input_tensors,
- const std::vector<std::shared_ptr<backend::ITensor>> output_tensors,
- const ir::OperandIndexSequence &output_indices, const ir::Graph &graph,
- const exec::DynAllocInfoMap &outputs_dyn_alloc_info,
- const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
- exec::ExecutorMap *executor_map)
- : _cond_tensor{cond_tensor}, _input_tensors{input_tensors}, _output_tensors{output_tensors},
- _output_indices{output_indices}, _graph{graph},
- _outputs_dyn_alloc_info{outputs_dyn_alloc_info}, _then_subg_index{then_subg_index},
- _else_subg_index{else_subg_index}, _executor_map{executor_map}
-{
- // At this point, executor_map may not have executors of then subg and else subg
-}
-
-void IfLayer::run()
-{
- // Check condition
- // // If true
- // // // Copy _input_tensors -> then subg's inputs
- // // // Run then subg
- // // // Copy outputs of then subg -> _output_tensors
- // // Else
- // // // Copy _input_tensors -> else subg's inputs if false
- // // // Run else subg
- // // // Copy outputs of else subg -> _output_tensors
- auto getResultCond = [](backend::ITensor *tensor) -> bool {
- bool ret = false;
- tensor->access([&](ITensor &tensor) { ret = *reinterpret_cast<bool *>(tensor.buffer()); });
- return ret;
- };
-
- exec::ExecutorBase *subg_exec = nullptr;
- if (getResultCond(_cond_tensor.get()))
- {
- subg_exec = nnfw::misc::polymorphic_downcast<exec::ExecutorBase *>(
- _executor_map->at(_then_subg_index).get());
- }
- else
- {
- subg_exec = nnfw::misc::polymorphic_downcast<exec::ExecutorBase *>(
- _executor_map->at(_else_subg_index).get());
- }
-
- const auto &subg_graph = subg_exec->graph();
-
- std::vector<std::shared_ptr<backend::ITensor>> src_tensors;
- std::vector<std::shared_ptr<backend::ITensor>> dst_tensors;
- // Add tensors used in subgraph or contained in outputs of subgraph
- assert(subg_graph.getInputs().size() == _input_tensors.size());
- assert(subg_graph.getInputs().size() == subg_exec->getInputTensors().size());
- for (uint32_t i = 0; i < subg_graph.getInputs().size(); ++i)
- {
- const auto &subg_input_index = subg_graph.getInputs().at(i);
- const auto &subg_input = subg_graph.operands().at(subg_input_index);
- if (subg_input.getUses().size() > 0 || subg_graph.getOutputs().contains(subg_input_index))
- {
- src_tensors.emplace_back(_input_tensors.at(i));
- dst_tensors.emplace_back(subg_exec->getInputTensors().at(i));
- }
- }
- const auto &subg_inputs_dyn_alloc_info = subg_exec->getInputsDynamicAllocInfo();
- const auto permute_op_input_to_subg_input =
- std::make_shared<PermuteLayer>(src_tensors, dst_tensors, subg_inputs_dyn_alloc_info);
-
- // Add tensors used as output of operation or contained in outputs of operation
- src_tensors.clear();
- dst_tensors.clear();
- assert(_output_indices.size() == subg_exec->getOutputTensors().size());
- assert(_output_indices.size() == _output_tensors.size());
- for (uint32_t i = 0; i < _output_indices.size(); ++i)
- {
- const auto &output_index = _output_indices.at(i);
- const auto &output = _graph.operands().at(output_index);
- if (output.getUses().size() > 0 || _graph.getOutputs().contains(output_index))
- {
- src_tensors.emplace_back(subg_exec->getOutputTensors().at(i));
- dst_tensors.emplace_back(_output_tensors.at(i));
- }
- }
- const auto permute_subg_output_to_op_output =
- std::make_shared<PermuteLayer>(src_tensors, dst_tensors, _outputs_dyn_alloc_info);
-
- // Remove copying of unused tensor
- permute_op_input_to_subg_input->prepare();
- permute_subg_output_to_op_output->prepare();
-
- // Copy & run
- subg_exec->execute(_input_tensors, permute_op_input_to_subg_input);
- permute_subg_output_to_op_output->run();
-}
-
-} // namespace kernel
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc b/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc
deleted file mode 100644
index e8f1ea679..000000000
--- a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "PermuteLayer.h"
-
-#include "exec/ShapeConverter.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-namespace kernel
-{
-
-void PermuteLayer::run()
-{
- assert(_src_tensors.size() == _dst_tensors.size());
- // PermuteLayer infers dynamic shape inside itself whenever run is called for the following
- // reasons:
- // 1. PermuteLayer has to access dynamic tensor manager for input/output tensors of other backends
- // 2. Other controlflow operation(If/While) uses this layout for copying tensors of other
- // subgraphs(with other backends)
- // 3. This infering code is placed here to avoid duplicated code that can be caused by above 2
- // reasons
-
- // check if output is not dynamic
- for (size_t i = 0; i < _src_tensors.size(); ++i)
- {
- auto dst_tensor = _dst_tensors.at(i);
- auto src_tensor = _src_tensors.at(i);
- if (src_tensor->is_dynamic() || dst_tensor->is_dynamic())
- {
- // getting output shape
- auto src_shape = src_tensor->getShape();
-
- // set output shape and output buffer
- ir::Shape new_shape =
- exec::convertShape(src_shape, src_tensor->layout(), dst_tensor->layout());
-
- try
- {
- const auto dst_index = _dst_dyn_alloc_info_map.at(dst_tensor).ind;
- auto dyn_tensor_manager = dst_tensor->dynamic_tensor_manager();
- if (!dyn_tensor_manager)
- throw std::runtime_error{
- "Error: PermuteLayer: output's TensorManager does not support dynamic tensor"};
- dyn_tensor_manager->applyShape(dst_index, new_shape);
- assert(dst_tensor->buffer() != nullptr);
- }
- catch (const std::out_of_range &e)
- {
- std::cerr << "Error: out_of_range in PermuteLayer: output's TensorManager does not support "
- "dynamic tensor"
- << '\n';
- throw;
- }
- }
- assert(exec::convertShape(src_tensor->getShape(), src_tensor->layout(), dst_tensor->layout()) ==
- dst_tensor->getShape());
- }
- IPermuteFunction::run();
-}
-
-} // namespace kernel
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h b/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h
deleted file mode 100644
index 403ac770d..000000000
--- a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_PERMUTELAYER_H__
-#define __ONERT_BACKEND_CONTROLFLOW_KERNEL_PERMUTELAYER_H__
-
-#include "backend/ITensorBuilder.h"
-#include "exec/IPermuteFunction.h"
-#include "exec/IExecutor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-namespace kernel
-{
-
-class PermuteLayer : public onert::exec::IPermuteFunction
-{
-public:
- PermuteLayer(const std::vector<std::shared_ptr<ITensor>> &src_tensors,
- const std::vector<std::shared_ptr<ITensor>> &dst_tensors,
- const exec::DynAllocInfoMap &dst_dyn_alloc_info_map)
- : _dst_dyn_alloc_info_map{dst_dyn_alloc_info_map}
- {
- assert(src_tensors.size() == dst_tensors.size());
- _src_tensors = src_tensors;
- _dst_tensors = dst_tensors;
- }
-
- void optimize() override
- {
- // Remove copying of tensor as nullptr
- auto src_it = _src_tensors.begin();
- auto dst_it = _dst_tensors.begin();
- while (src_it != _src_tensors.end())
- {
- if ((*src_it == *dst_it) || (*src_it == nullptr || *dst_it == nullptr))
- {
- src_it = _src_tensors.erase(src_it);
- dst_it = _dst_tensors.erase(dst_it);
- }
- else
- {
- ++src_it;
- ++dst_it;
- }
- }
- }
-
- void run() override;
-
-private:
- const exec::DynAllocInfoMap _dst_dyn_alloc_info_map;
-};
-
-} // namespace kernel
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CONTROLFLOW_KERNEL_PERMUTELAYER_H__
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc b/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc
deleted file mode 100644
index 50936e5f6..000000000
--- a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc
+++ /dev/null
@@ -1,216 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "WhileLayer.h"
-
-#include <backend/ITensor.h>
-#include "exec/ExecutorBase.h"
-#include <misc/polymorphic_downcast.h>
-#include "PermuteLayer.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace controlflow
-{
-namespace kernel
-{
-
-WhileLayer::WhileLayer(const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
- const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const ir::OperandIndexSequence &output_indices, const ir::Graph &graph,
- const exec::DynAllocInfoMap &outputs_dyn_alloc_info,
- const ir::SubgraphIndex &cond_subg_index,
- const ir::SubgraphIndex &body_subg_index, exec::ExecutorMap *executor_map)
- : _cond_subg_index{cond_subg_index}, _body_subg_index{body_subg_index},
- _output_indices{output_indices}, _graph{graph}, _input_tensors{input_tensors},
- _output_tensors{output_tensors}, _outputs_dyn_alloc_info{outputs_dyn_alloc_info},
- _executor_map{executor_map}
-{
- // At this point, executor_map may not have executors of cond subg and body subg
-}
-
-void WhileLayer::run()
-{
- // Copy "_input_tensors" -> "cond subg inputs"
- // Run cond subg
- // Start loop while output of cond subg is ture
- // // Copy "_input_tensors" -> "body subg inputs" in the first iteration, then copy "body subg
- // outputs" -> "body subg inputs" in the second or more iterations
- // // Run body subg
- // // Copy "body subg outputs" -> "cond subg inputs"
- // // Run cond subg
- // If there is no loop copy "_input_tensors" -> "_dst_tensors", else copy "cond subg inputs" ->
- // "_dst_tensors"
- auto cond_exec = nnfw::misc::polymorphic_downcast<exec::ExecutorBase *>(
- _executor_map->at(_cond_subg_index).get());
- auto body_exec = nnfw::misc::polymorphic_downcast<exec::ExecutorBase *>(
- _executor_map->at(_body_subg_index).get());
-
- const auto &cond_graph = cond_exec->graph();
- const auto &cond_inputs_dyn_alloc = cond_exec->getInputsDynamicAllocInfo();
- const auto &body_graph = body_exec->graph();
- const auto &body_inputs_dyn_alloc = body_exec->getInputsDynamicAllocInfo();
-
- std::vector<std::shared_ptr<backend::ITensor>> input_tensors;
- std::vector<std::shared_ptr<backend::ITensor>> cond_input_tensors;
- std::vector<std::shared_ptr<backend::ITensor>> body_input_tensors;
- std::vector<std::shared_ptr<backend::ITensor>> body_output_tensors;
- std::vector<std::shared_ptr<backend::ITensor>> output_tensors;
-
- // Add only used tensors in cond subgraph
- assert(cond_graph.getInputs().size() == _input_tensors.size());
- assert(cond_graph.getInputs().size() == cond_exec->getInputTensors().size());
- for (uint32_t i = 0; i < cond_graph.getInputs().size(); ++i)
- {
- const auto &cond_input = cond_graph.operands().at(cond_graph.getInputs().at(i));
- if (cond_input.getUses().size() > 0)
- {
- input_tensors.emplace_back(_input_tensors.at(i));
- cond_input_tensors.emplace_back(cond_exec->getInputTensors().at(i));
- }
- }
- const auto permute_op_input_to_cond_input =
- std::make_shared<PermuteLayer>(input_tensors, cond_input_tensors, cond_inputs_dyn_alloc);
-
- // Add only used tensors among outputs of while operation
- assert(_output_indices.size() == _input_tensors.size());
- assert(_output_indices.size() == _output_tensors.size());
- input_tensors.clear();
- output_tensors.clear();
- for (size_t i = 0; i < _output_indices.size(); ++i)
- {
- const auto &output_index = _output_indices.at(i);
- const auto &output = _graph.operands().at(output_index);
- if (output.getUses().size() > 0 || _graph.getOutputs().contains(output_index))
- {
- input_tensors.emplace_back(_input_tensors.at(i));
- output_tensors.emplace_back(_output_tensors.at(i));
- }
- }
- const auto permute_op_input_to_op_output =
- std::make_shared<PermuteLayer>(input_tensors, output_tensors, _outputs_dyn_alloc_info);
-
- // Add all tensors with unused tensors in body subgraph because unused input tensors will be
- // copied output tensors in body subgraph
- assert(_input_tensors.size() == body_exec->getInputTensors().size());
- input_tensors = _input_tensors;
- body_input_tensors = body_exec->getInputTensors();
- const auto permute_op_input_to_body_input =
- std::make_shared<PermuteLayer>(input_tensors, body_input_tensors, body_inputs_dyn_alloc);
-
- // Add only used tensors in cond subgraph
- assert(cond_graph.getInputs().size() == body_exec->getOutputTensors().size());
- assert(cond_graph.getInputs().size() == cond_exec->getInputTensors().size());
- body_output_tensors.clear();
- cond_input_tensors.clear();
- for (uint32_t i = 0; i < cond_graph.getInputs().size(); ++i)
- {
- const auto &cond_input = cond_graph.operands().at(cond_graph.getInputs().at(i));
- if (cond_input.getUses().size() > 0)
- {
- body_output_tensors.emplace_back(body_exec->getOutputTensors().at(i));
- cond_input_tensors.emplace_back(cond_exec->getInputTensors().at(i));
- }
- }
- const auto permute_body_output_to_cond_input = std::make_shared<PermuteLayer>(
- body_output_tensors, cond_input_tensors, cond_inputs_dyn_alloc);
-
- // Add only used tensors in body subgraph
- assert(body_graph.getInputs().size() == body_exec->getOutputTensors().size());
- assert(body_graph.getInputs().size() == body_exec->getInputTensors().size());
- body_output_tensors.clear();
- body_input_tensors.clear();
- for (uint32_t i = 0; i < body_graph.getInputs().size(); ++i)
- {
- const auto &body_input_index = body_graph.getInputs().at(i);
- const auto &body_input = body_graph.operands().at(body_input_index);
- if (body_input.getUses().size() > 0 &&
- !body_exec->graph().getOutputs().contains(body_input_index))
- {
- body_output_tensors.emplace_back(body_exec->getOutputTensors().at(i));
- body_input_tensors.emplace_back(body_exec->getInputTensors().at(i));
- }
- }
- const auto permute_body_output_to_body_input = std::make_shared<PermuteLayer>(
- body_output_tensors, body_input_tensors, body_inputs_dyn_alloc);
-
- // Add only used tensors among outputs of while operation
- assert(_output_indices.size() == body_exec->getOutputTensors().size());
- assert(_output_indices.size() == _output_tensors.size());
- body_output_tensors.clear();
- output_tensors.clear();
- for (size_t i = 0; i < _output_indices.size(); ++i)
- {
- const auto &output_index = _output_indices.at(i);
- const auto &output = _graph.operands().at(output_index);
- if (output.getUses().size() > 0 || _graph.getOutputs().contains(output_index))
- {
- body_output_tensors.emplace_back(body_exec->getOutputTensors().at(i));
- output_tensors.emplace_back(_output_tensors.at(i));
- }
- }
- const auto permute_body_output_to_op_output =
- std::make_shared<PermuteLayer>(body_output_tensors, output_tensors, _outputs_dyn_alloc_info);
-
- // Remove copying of unused tensor
- permute_op_input_to_cond_input->prepare();
- permute_op_input_to_op_output->prepare();
- permute_op_input_to_body_input->prepare();
- permute_body_output_to_cond_input->prepare();
- permute_body_output_to_body_input->prepare();
- permute_body_output_to_op_output->prepare();
-
- cond_exec->execute(_input_tensors, permute_op_input_to_cond_input);
-
- assert(cond_exec->getOutputTensors().size() == 1);
- auto &cond_output_tensor = cond_exec->getOutputTensors().at(0);
- auto getResultCond = [](backend::ITensor *tensor) -> bool {
- bool ret = false;
- tensor->access([&](ITensor &tensor) { ret = *reinterpret_cast<bool *>(tensor.buffer()); });
- return ret;
- };
-
- const auto body_execute_with_op_inputs = [&]() {
- body_exec->execute(_input_tensors, permute_op_input_to_body_input);
- };
-
- const auto body_execute_with_body_outputs = [&]() {
- body_exec->execute(body_exec->getOutputTensors(), permute_body_output_to_body_input);
- };
-
- std::function<void()> body_execute = body_execute_with_op_inputs;
- const auto cond_execute = [&]() {
- cond_exec->execute(body_exec->getOutputTensors(), permute_body_output_to_cond_input);
- };
- auto permute_to_outputs_fn = permute_op_input_to_op_output;
-
- // Loop while Cond subgraph's output is true
- while (getResultCond(cond_output_tensor.get()))
- {
- body_execute();
- cond_execute();
- body_execute = body_execute_with_body_outputs;
- permute_to_outputs_fn = permute_body_output_to_op_output;
- }
- permute_to_outputs_fn->run();
-}
-
-} // namespace kernel
-} // namespace controlflow
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc b/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc
deleted file mode 100644
index f7ce3d011..000000000
--- a/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "backend/cpu_common/DynamicTensorManager.h"
-
-#include "util/logging.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu_common
-{
-
-DynamicTensorManager::DynamicTensorManager(const std::shared_ptr<TensorRegistry> &reg)
- : _dynamic_mem_mgr{new DynamicMemoryManager()}, _tensors{reg}
-{
- // DO NOTHING
-}
-
-void DynamicTensorManager::applyShape(const ir::OperandIndex &ind, const ir::Shape &new_shape)
-{
- VERBOSE_F() << ind << std::endl;
-
- auto tensor = _tensors->getNativeTensor(ind);
- assert(tensor);
-
- bool previously_dynamic = tensor->is_dynamic();
-
- auto allocTensorMem = [&](bool overwrite = false) {
- auto capacity = tensor->total_size();
- auto alloc = _dynamic_mem_mgr->allocate(ind, capacity);
-
- if (overwrite)
- tensor->overwriteBuffer(alloc);
- else
- tensor->setBuffer(alloc);
- };
-
- if (!previously_dynamic)
- {
- // TODO deallocate tensor->buffer()
- // issue is that staticTensorManager might have allocate this memory
- tensor->setShape(new_shape);
- tensor->set_dynamic();
- allocTensorMem(true);
- }
- else if (tensor->buffer() == nullptr)
- {
- tensor->setShape(new_shape);
- tensor->set_dynamic();
- allocTensorMem();
- }
- // when buffer was already allocated and new_shape requires different size
- else
- {
- auto previous_size = tensor->total_size();
- auto new_size = new_shape.num_elements() * sizeOfDataType(tensor->data_type());
- if (previous_size != new_size)
- {
- _dynamic_mem_mgr->deallocate(ind);
-
- tensor->setShape(new_shape);
- tensor->set_dynamic();
- allocTensorMem(true);
- }
- else
- { // when buffer with same size was already allocated, shape could differ
- tensor->setShape(new_shape);
- }
- }
-}
-
-void DynamicTensorManager::buildTensor(const ir::OperandIndex &ind,
- const ir::OperandInfo &tensor_info,
- ir::Layout backend_layout)
-{
- assert(_tensors->getNativeTensor(ind) == nullptr);
- auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout, this);
- _tensors->setNativeTensor(ind, tensor);
-}
-
-void DynamicTensorManager::planDealloc(ir::OperationIndex op_ind, ir::OperandIndex operand_ind)
-{
- _dealloc_tensor_map[op_ind].emplace(operand_ind);
-}
-
-void DynamicTensorManager::deallocInput(ir::OperationIndex op_ind)
-{
- auto find = _dealloc_tensor_map.find(op_ind);
- if (find == _dealloc_tensor_map.end())
- return;
-
- auto &input_set = find->second;
- for (auto input_ind : input_set)
- {
- auto *tensor = _tensors->getNativeTensor(input_ind).get();
- if (!tensor->is_dynamic())
- continue;
-
- _dynamic_mem_mgr->deallocate(input_ind);
- tensor->resetBuffer();
-
- VERBOSE(DynamicTensorManager) << "Deallocating #" << input_ind.value()
- << " (input of op_ind: " << op_ind.value() << ")" << std::endl;
- }
-}
-
-void DynamicTensorManager::deallocSubgraphOutput(ir::OperandIndex output_ind)
-{
- auto *tensor = _tensors->getNativeTensor(output_ind).get();
- if (!tensor->is_dynamic())
- return;
-
- _dynamic_mem_mgr->deallocate(output_ind);
- tensor->resetBuffer();
-
- VERBOSE(DynamicTensorManager) << "Deallocating #" << output_ind.value()
- << " (output of a subgraph)" << std::endl;
-}
-
-} // namespace cpu_common
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/src/compiler/BackendManager.cc b/runtime/onert/core/src/compiler/BackendManager.cc
index db7a14a96..44442c065 100644
--- a/runtime/onert/core/src/compiler/BackendManager.cc
+++ b/runtime/onert/core/src/compiler/BackendManager.cc
@@ -16,22 +16,17 @@
#include "compiler/BackendManager.h"
-#include <memory>
-#include <dlfcn.h>
+#include "../backend/builtin/Backend.h"
+#include "../backend/builtin/Config.h"
-#include "backend/Backend.h"
-#include "backend/controlflow/Backend.h"
-#include "backend/controlflow/Config.h"
-#include "backend/IConfig.h"
-#include "util/logging.h"
-#include "util/ConfigSource.h"
-#include "misc/string_helpers.h"
+#include <dlfcn.h>
+#include <memory>
static const char *SHARED_LIB_EXT =
#if defined(__APPLE__) && defined(__MACH__)
- ".dylib";
+ ".dylib";
#else
- ".so";
+ ".so";
#endif
namespace onert
@@ -45,20 +40,20 @@ BackendManager &BackendManager::get()
return object;
}
-BackendManager::BackendManager() { loadControlflowBackend(); }
+BackendManager::BackendManager() { loadBuiltinBackend(); }
-void BackendManager::loadControlflowBackend()
+void BackendManager::loadBuiltinBackend()
{
- auto backend_object = std::unique_ptr<backend::controlflow::Backend, backend_destroy_t>(
- new backend::controlflow::Backend, [](backend::Backend *backend) { delete backend; });
+ auto backend_object = std::unique_ptr<backend::builtin::Backend, backend_destroy_t>(
+ new backend::builtin::Backend, [](backend::Backend *backend) { delete backend; });
bool initialized = backend_object->config()->initialize(); // Call initialize here?
if (!initialized)
{
- throw std::runtime_error(backend::controlflow::Config::ID + " backend initialization failed");
+ throw std::runtime_error(backend::builtin::Config::ID + " backend initialization failed");
}
- _controlflow = backend_object.get(); // Save the controlflow backend implementation pointer
- assert(_controlflow);
+ _builtin = backend_object.get(); // Save the builtin backend implementation pointer
+ assert(_builtin);
_gen_map.emplace(backend_object->config()->id(), std::move(backend_object));
}
@@ -69,68 +64,67 @@ void BackendManager::loadBackend(const std::string &backend)
return;
}
- // TODO Remove indentation
- // Workaround If backend have dynamic library with "-boost" suffix naming,
- // BackendManager load library with "-boost" suffix instead of library without suffix
- // This feature is used for custom backend extension to support additional operations
- {
- const std::string backend_boost_so = "libbackend_" + backend + "-boost" + SHARED_LIB_EXT;
- const std::string backend_so = "libbackend_" + backend + SHARED_LIB_EXT;
+ const std::string backend_so = "libbackend_" + backend + SHARED_LIB_EXT;
+ void *handle = dlopen(backend_so.c_str(), RTLD_LAZY | RTLD_LOCAL);
- void *handle = dlopen(backend_boost_so.c_str(), RTLD_LAZY | RTLD_LOCAL);
- if (handle == nullptr)
- {
- handle = dlopen(backend_so.c_str(), RTLD_LAZY | RTLD_LOCAL);
+ if (handle == nullptr)
+ {
+ VERBOSE(BackendManager) << "Failed to load backend '" << backend << "' - " << dlerror() << "\n";
+ return;
+ }
- if (handle == nullptr)
- {
- VERBOSE_F() << "Failed to load backend '" << backend << "' - " << dlerror() << std::endl;
- return;
- }
+ VERBOSE(BackendManager) << "Successfully loaded '" << backend << "'(" << backend_so << ")\n";
- VERBOSE_F() << "Successfully loaded '" << backend << "' - " << backend_so << "\n";
+ {
+ // load object creator function
+ auto backend_create = (backend_create_t)dlsym(handle, "onert_backend_create");
+ if (backend_create == nullptr)
+ {
+ // TODO replace `fprintf` with `VERBOSE`
+ fprintf(stderr, "BackendManager: unable to find function `onert_backend_create` : %s\n",
+ dlerror());
+ dlclose(handle);
+ return;
}
- else
+
+ // load object creator function
+ auto backend_destroy = (backend_destroy_t)dlsym(handle, "onert_backend_destroy");
+ if (backend_destroy == nullptr)
{
- VERBOSE_F() << "Successfully loaded '" << backend << "' - " << backend_boost_so << "\n";
+ // TODO replace `fprintf` with `VERBOSE`
+ fprintf(stderr, "BackendManager: unable to find `function onert_backend_destroy` : %s\n",
+ dlerror());
+ dlclose(handle);
+ return;
}
+ auto backend_object =
+ std::unique_ptr<backend::Backend, backend_destroy_t>(backend_create(), backend_destroy);
+ bool initialized = backend_object->config()->initialize(); // Call initialize here?
+ if (!initialized)
{
- // load object creator function
- auto backend_create = (backend_create_t)dlsym(handle, "onert_backend_create");
- if (backend_create == nullptr)
- {
- fprintf(stderr, "BackendManager: unable to open function onert_backend_create : %s\n",
- dlerror());
- abort();
- }
+ VERBOSE(BackendManager) << backend.c_str()
+ << " backend initialization failed. Don't use this backend"
+ << std::endl;
+ dlclose(handle);
+ return;
+ }
+ _gen_map.emplace(backend_object->config()->id(), std::move(backend_object));
+ }
- // load object creator function
- auto backend_destroy = (backend_destroy_t)dlsym(handle, "onert_backend_destroy");
- if (backend_destroy == nullptr)
+ // Save backend handle (avoid warning by handle lost without dlclose())
+ auto u_handle = std::unique_ptr<void, dlhandle_destroy_t>{
+ handle, [id = backend, filename = backend_so](void *h) {
+ if (dlclose(h) == 0)
{
- fprintf(stderr, "BackendManager: unable to open function onert_backend_destroy : %s\n",
- dlerror());
- abort();
+ VERBOSE(BackendManager) << "Successfully unloaded '" << id << "'(" << filename << ")\n";
}
-
- auto backend_object =
- std::unique_ptr<backend::Backend, backend_destroy_t>(backend_create(), backend_destroy);
- bool initialized = backend_object->config()->initialize(); // Call initialize here?
- if (!initialized)
+ else
{
- VERBOSE_F() << backend.c_str() << " backend initialization failed. Don't use this backend"
- << std::endl;
- dlclose(handle);
- return;
+ VERBOSE(BackendManager) << "Failed to unload backend '" << id << "'- " << dlerror() << "\n";
}
- _gen_map.emplace(backend_object->config()->id(), std::move(backend_object));
- }
-
- // Save backend handle (avoid warning by handle lost without dlclose())
- auto u_handle = std::unique_ptr<void, dlhandle_destroy_t>{handle, [](void *h) { dlclose(h); }};
- _handle_map.emplace(backend, std::move(u_handle));
- }
+ }};
+ _handle_map.emplace(backend, std::move(u_handle));
}
backend::Backend *BackendManager::get(const std::string &key)
@@ -153,7 +147,7 @@ const backend::Backend *BackendManager::get(const std::string &key) const
return nullptr;
}
-const backend::controlflow::Backend *BackendManager::getControlflow() const { return _controlflow; }
+const backend::Backend *BackendManager::getBuiltin() const { return _builtin; }
} // namespace compiler
} // namespace onert
diff --git a/runtime/onert/core/src/compiler/Compiler.cc b/runtime/onert/core/src/compiler/Compiler.cc
index 93dbbc3b5..ba621bb4f 100644
--- a/runtime/onert/core/src/compiler/Compiler.cc
+++ b/runtime/onert/core/src/compiler/Compiler.cc
@@ -16,284 +16,185 @@
#include "compiler/Compiler.h"
-#include "ParamChecker.h"
+#include "CompilerHelpers.h"
#include "ExecutorFactory.h"
-#include "OperationValidator.h"
-#include "Fp32ToFp16Converter.h"
-
-#include <backend/controlflow/Config.h>
-#include "compiler/BackendManager.h"
-#include "compiler/IScheduler.h"
-#include "compiler/ManualScheduler.h"
-#include "compiler/HEScheduler.h"
-#include "compiler/StaticShapeInference.h"
-#include "exec/ExecTime.h"
-#include "ir/operation/LowerInfo.h"
-#include "dumper/dot/DotDumper.h"
-#include "compiler/Linear.h"
-#include "interp/InterpExecutor.h"
-#include "util/ConfigSource.h"
-#include "util/logging.h"
-#include "ir/OperationDumper.h"
-#include "misc/string_helpers.h"
+#include "ShapeValidator.h"
+#include "pass/ConstantOutputPass.h"
+#include "pass/OddOutputPass.h"
+#include "pass/PassRunner.h"
+#include "pass/UnusedOperandEliminationPass.h"
+#include "../dumper/dot/DotDumper.h"
+#include "../exec/SingleModelExecutors.h"
+#include "../ir/OperationDumper.h"
+#include "../ir/verifier/Verifier.h"
+
+#include "compiler/StaticShapeInferer.h"
+
+#include <misc/string_helpers.h>
+#include <misc/polymorphic_downcast.h>
namespace onert
{
-
namespace compiler
{
-CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs)
+Compiler::Compiler(const std::shared_ptr<ir::Model> &model, CompilerOptions &copt)
+ : _model{model}, _options{&copt}
{
- CompilerOptions options;
- options.backend_list = nnfw::misc::split(util::getConfigString(util::config::BACKENDS), ';');
- options.is_primary_subgraph = false;
- options.trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH);
- options.graph_dump_level = util::getConfigInt(util::config::GRAPH_DOT_DUMP);
- options.op_seq_max_node = util::getConfigInt(util::config::OP_SEQ_MAX_NODE);
- options.executor = util::getConfigString(util::config::EXECUTOR);
- options.he_scheduler = util::getConfigBool(util::config::USE_SCHEDULER);
- options.he_profiling_mode = util::getConfigBool(util::config::PROFILING_MODE);
- options.disable_compile = util::getConfigBool(util::config::DISABLE_COMPILE);
- options.fp16_enable = util::getConfigBool(util::config::FP16_ENABLE);
-#ifdef RUY_PROFILER
- options.op_seq_max_node = 1;
-#endif
-
- {
- // Backend for all
- auto &ms_options = options.manual_scheduler_options;
-
- // Default value for op_backend_all is first element in the backend list
- ms_options.backend_for_all = util::getConfigString(util::config::OP_BACKEND_ALLOPS);
-
-// Opcode to Backend
-#define OP(OpName) \
- { \
- const auto &backend_str = util::getConfigString(util::config::OP_BACKEND_##OpName); \
- if (!backend_str.empty()) \
- { \
- ms_options.opcode_to_backend[ir::OpCode::OpName] = backend_str; \
- } \
- }
-#include "ir/Operations.lst"
-#undef OP
-
- // Index to Backend
- // TODO Support multiple subgraphs for manual scheduling
- auto map_str = util::getConfigString(util::config::OP_BACKEND_MAP);
- auto key_val_list = nnfw::misc::split(map_str, ';');
- for (const auto &key_val_str : key_val_list)
- {
- if (key_val_str.empty())
- {
- continue;
- }
-
- auto key_val = nnfw::misc::split(key_val_str, '=');
- const auto &key_str = key_val.at(0);
- const auto &val = key_val.at(1);
- auto key = static_cast<uint32_t>(std::stoi(key_str));
-
- subgs.at(ir::SubgraphIndex{0})
- ->operations()
- .at(ir::OperationIndex{key}); // Check if exist, or this wil throw
- ms_options.index_to_backend.emplace(ir::OperationIndex{key}, val);
- }
- }
- return options;
+ // DO NOTHING
}
-Compiler::Compiler(const std::shared_ptr<ir::Subgraphs> &subgs)
- : _subgraphs{subgs}, _state{State::CREATED}
+Compiler::Compiler(const std::shared_ptr<ir::NNPkg> &nnpkg,
+ std::vector<std::unique_ptr<CompilerOptions>> &copts)
+ : _model{nnpkg->primary_model()}, _options{copts[0].get()}
{
- // Set default values for CompilerOptions
- // All these default values should not be fetched from Env, when we stop supporting Android NN
- // API.
- _options = fetchCompilerOptionsFromGlobalConfig(*subgs);
+ // Use for single model only
+ assert(nnpkg->model_count() == 1);
}
-void Compiler::enableToFp16() { _options.fp16_enable = true; }
-
-void Compiler::checkProfilerConditions()
+std::shared_ptr<CompilerArtifact> Compiler::compile(void)
{
- if (!_options.he_scheduler)
- throw std::runtime_error("Heterogeneous scheduler must be enabled during profiling.");
-
- if (_options.executor != "Dataflow")
- throw std::runtime_error("Profiling mode works only with 'Dataflow' executor");
-}
+ /***************************************************
+ * Prepare compilation phase
+ ***************************************************/
+ if (!_options)
+ throw std::runtime_error{"Empty compile option"};
-std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
-{
- // Set control flow backend for control flow operators
+ // Mode check
+ // TODO handle option for each model
+ if (_options->he_profiling_mode)
{
- _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] =
- backend::controlflow::Config::ID;
- _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] =
- backend::controlflow::Config::ID;
+ if (!_options->he_scheduler)
+ throw std::runtime_error("Heterogeneous scheduler must be enabled during profiling.");
+
+ if (_options->executor != "Dataflow")
+ throw std::runtime_error("Profiling mode works only with 'Dataflow' executor");
}
- // FIXME This is a workaround for bcq operations, should remove it
+ if (!_options->minmax_filepath.empty())
{
- _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq";
- _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq";
+ if (_options->executor != "Linear")
+ throw std::runtime_error("Recording minmax works only with Linear executor");
}
+ if (!_model->hasOnly<ir::Graph>())
{
- VERBOSE(Compiler) << std::boolalpha;
- VERBOSE(Compiler) << "==== Compiler Options ====" << std::endl;
- VERBOSE(Compiler) << "backend_list : "
- << nnfw::misc::join(_options.backend_list.begin(),
- _options.backend_list.end(), "/")
- << std::endl;
- VERBOSE(Compiler) << "trace_filepath : " << _options.trace_filepath << std::endl;
- VERBOSE(Compiler) << "graph_dump_level : " << _options.graph_dump_level << std::endl;
- VERBOSE(Compiler) << "op_seq_max_node : " << _options.op_seq_max_node << std::endl;
- VERBOSE(Compiler) << "executor : " << _options.executor << std::endl;
- VERBOSE(Compiler) << "manual_scheduler_options : (Too many things to print)" << std::endl;
- VERBOSE(Compiler) << "he_scheduler : " << _options.he_scheduler << std::endl;
- VERBOSE(Compiler) << "he_profiling_mode : " << _options.he_profiling_mode << std::endl;
- VERBOSE(Compiler) << "disable_compile : " << _options.disable_compile << std::endl;
- VERBOSE(Compiler) << "fp16_enable : " << _options.fp16_enable << std::endl;
- VERBOSE(Compiler) << std::noboolalpha;
+ throw std::runtime_error("Compiler can only compile models for inference.");
}
- /***************************************************
- * Prepare compilation phase
- ***************************************************/
+ _options->forceInternalOptions();
+ _options->verboseOptions();
- auto executors = std::make_shared<exec::ExecutorMap>();
+ auto custom_kernel_builder = _model->getKernelBuilder();
- // Compilable check
- // TODO: Support hybrid execution -
- // execution between interpreter and compiled executor (including control flow)
- if (!checkCompilable())
- {
- _subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
- executors->emplace(index, std::make_unique<interp::InterpExecutor>(subg));
- });
- _state = State::COMPILED;
- return executors;
- }
+ _model->iterate([&](const ir::SubgraphIndex &, ir::IGraph &graph) {
+ auto &subg = nnfw::misc::polymorphic_downcast<ir::Graph &>(graph);
- // Mode check
- if (_options.he_profiling_mode)
- checkProfilerConditions();
+ // Mandatory passes
+ pass::PassRunner{}
+ .append(std::make_unique<pass::ConstantOutputPass>(subg))
+ .append(std::make_unique<pass::OddOutputPass>(subg))
+ .run();
+
+ // Optimizations
+ pass::PassRunner{}.append(std::make_unique<pass::UnusedOperandEliminationPass>(subg)).run();
+ });
/***************************************************
* Backend independent analysis & optimization phase
***************************************************/
- auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_options.graph_dump_level);
+ // TODO Handle dump level for each model
+ auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_options->graph_dump_level);
+ onert::dumper::dot::DotDumper dot_dumper(dump_level);
+
+ // Tracing context
+ auto tracing_ctx = std::make_unique<util::TracingCtx>();
// Lower: Assign backend
std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>> lowered_subgs;
- _subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
- _options.is_primary_subgraph = (index == ir::SubgraphIndex{0});
- onert::dumper::dot::DotDumper dot_dumper(subg, dump_level);
- dot_dumper.dump(nnfw::misc::str("before_lower_subg-", index.value()));
-
- // Lower: Assign backend
- lowered_subgs[index] = std::make_unique<compiler::LoweredGraph>(subg, _options);
-
- // Check backend(s) for subgraph support FP16
- bool backends_support_fp16 = true;
- auto &contexts = (*lowered_subgs[index]).backend_contexts();
- for (auto it = contexts.begin(); it != contexts.end(); it++)
- {
- // Controlflow backend is not for actual computaion of operations so it is an exception
- if (it->first->config()->id() != backend::controlflow::Config::ID)
- backends_support_fp16 &= it->first->config()->supportFP16();
- }
-
- if (_options.fp16_enable && backends_support_fp16)
- {
- // NOTE: the only acl_cl backend enables fp16 mode
- Fp32ToFp16Converter(*lowered_subgs[index]).run();
- }
+ {
+ _model->iterate([&](const ir::SubgraphIndex &subg_index, ir::IGraph &graph) {
+ auto &subg = nnfw::misc::polymorphic_downcast<ir::Graph &>(graph);
+
+ // Lower: Assign backend
+ lowered_subgs[subg_index] = std::make_unique<compiler::LoweredGraph>(subg, *_options);
+ // Set tracing_ctx for copied graph
+ if (tracing_ctx != nullptr)
+ tracing_ctx->setSubgraphIndex(&(lowered_subgs[subg_index]->graph()), subg_index.value());
+ });
+ }
- subg.setSubgraphs(nullptr);
- });
+ _model.reset();
- _subgraphs.reset();
+ for (const auto &pair : lowered_subgs)
+ {
+ const auto &subg_index = pair.first;
+ const auto &lowered_subg = pair.second;
+ dot_dumper.dump(*lowered_subg, nnfw::misc::str("after_lower_subg-", subg_index.value()));
+ }
// Shape inference.
{
+ // Run the StaticShapeInfer of primary subg. All child StaticShapeInferers are called
+ // recursively
+ std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers =
+ createStaticShapeInferers(lowered_subgs);
+
const auto primary_subg_idx = ir::SubgraphIndex{0};
- StaticShapeInferer inferer(primary_subg_idx, lowered_subgs);
- lowered_subgs.at(primary_subg_idx)
- ->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
- auto has_dynamic_tensor = inferer.infer(op_seq);
- op_seq.has_dynamic_tensor(has_dynamic_tensor);
- });
- inferer.dump();
- }
+ inferers.at(primary_subg_idx)->infer();
- /*************************************************************
- * Backend independent analysis & optimization phase finished
- *************************************************************/
+ for (const auto &pair_inferer : inferers)
+ {
+ const auto inferer = pair_inferer.second.get();
+ inferer->dump();
+ }
+ }
- // operation validation
- for (auto &pair : lowered_subgs)
+ // Shape validation
+ // TODO Move shape independent feature check from ShapeValidator to OperationValidator
+ // TODO Move ShapeValidator into shape inference
+ // - Check input tensor shape validation
+ // - Check parameter value validation which valid value is depend on input tensor shape
+ // - Output tensor shape validation check is needless because
+ // static/dynamic shape inferer will make valid output shape
+ for (const auto &pair : lowered_subgs)
{
auto &lowered_subg = pair.second;
- compiler::OperationValidator{lowered_subg->graph()}();
+ compiler::ShapeValidator{lowered_subg->graph()}();
}
- executors = std::make_shared<exec::ExecutorMap>();
- for (auto &pair : lowered_subgs)
+ /*************************************************************
+ * Backend independent analysis & optimization phase finished
+ *************************************************************/
+ auto executors = std::make_shared<exec::SingleModelExecutors>();
+ for (auto &&pair : lowered_subgs)
{
- const auto &subg_index = pair.first;
+ auto const model_index = ir::ModelIndex{0};
+ auto const subg_index = pair.first;
auto &lowered_subg = pair.second;
- auto indexed_ranks = lowered_subg->indexed_ranks();
-
- _options.is_primary_subgraph = (subg_index == ir::SubgraphIndex{0});
-
- onert::dumper::dot::DotDumper dot_dumper_lowered(lowered_subg.get(), dump_level);
- dot_dumper_lowered.dump("after_lower_subg-" + std::to_string(subg_index.value()));
+ auto const indexed_ranks = lowered_subg->indexed_ranks();
- ir::OperationDumper dumper("START SUBGRAPH " + std::to_string(subg_index.value()));
+ ir::OperationDumper dumper("Executor generation of Subgraph " +
+ std::to_string(subg_index.value()));
lowered_subg->graph().operations().iterate(
- [&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); });
+ [&](const ir::OperationIndex &, const ir::IOperation &op) { op.accept(dumper); });
+
+ ExecutorFactoryArgs args;
+ args.tracing_ctx = tracing_ctx.get();
+ args.options = _options;
+ args.model_index = model_index;
+ args.custom_kernel_builder = custom_kernel_builder;
auto executor = std::unique_ptr<exec::IExecutor>{
- ExecutorFactory::get().create(std::move(lowered_subg), _options, executors)};
+ ExecutorFactory::get().create(std::move(lowered_subg), executors, args)};
executor->setIndexedRanks(indexed_ranks);
- executors->insert(std::make_pair(subg_index, std::move(executor)));
+ executors->emplace(model_index, subg_index, std::move(executor));
}
/********************************
* Code generation phase finished
********************************/
- _state = State::COMPILED;
- return executors;
-}
-
-bool Compiler::checkCompilable()
-{
- // Disable compile phase
- // When ready to use interpreter backend, remove this config and use backend setting
- if (_options.disable_compile)
- {
- return false;
- }
-
- // TODO check unspecified operand shape
-
- // Check compilable parameter
- for (uint32_t i = 0; i < _subgraphs->count(); ++i)
- {
- auto graph = _subgraphs->at(ir::SubgraphIndex{i});
- ParamChecker paramChecker{graph};
- paramChecker();
- if (paramChecker.haveNoneConstParam())
- {
- return false;
- }
- }
-
- return true;
+ return std::make_shared<CompilerArtifact>(executors, std::move(tracing_ctx));
}
} // namespace compiler
-
} // namespace onert
diff --git a/runtime/onert/core/src/compiler/CompilerFactory.cc b/runtime/onert/core/src/compiler/CompilerFactory.cc
new file mode 100644
index 000000000..aeb0876c4
--- /dev/null
+++ b/runtime/onert/core/src/compiler/CompilerFactory.cc
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "compiler/CompilerFactory.h"
+
+#include "MultiModelCompiler.h"
+#ifdef ONERT_TRAIN
+#include "train/TrainingCompiler.h"
+#endif // ONERT_TRAIN
+
+#include "compiler/Compiler.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+CompilerFactory &CompilerFactory::get()
+{
+ static CompilerFactory singleton;
+ return singleton;
+}
+
+std::unique_ptr<ICompiler>
+CompilerFactory::create(const std::shared_ptr<ir::NNPkg> &nnpkg,
+ std::vector<std::unique_ptr<CompilerOptions>> &copts,
+ const compiler::train::TrainingInfo *training_info)
+{
+#ifdef ONERT_TRAIN
+ // Returing compiler for training
+ if (training_info)
+ return std::make_unique<train::TrainingCompiler>(nnpkg, copts, *training_info);
+#else // ONERT_TRAIN
+ (void)training_info;
+#endif // ONERT_TRAIN
+
+ // Returing compiler for inference
+ if (nnpkg->model_count() == 1)
+ return std::make_unique<Compiler>(nnpkg, copts);
+
+ return std::make_unique<MultiModelCompiler>(nnpkg, copts);
+}
+
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/CompilerHelpers.h b/runtime/onert/core/src/compiler/CompilerHelpers.h
new file mode 100644
index 000000000..798334b3b
--- /dev/null
+++ b/runtime/onert/core/src/compiler/CompilerHelpers.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_COMPILER_HELPERS_H__
+#define __ONERT_COMPILER_COMPILER_HELPERS_H__
+
+#include <compiler/ILoweredGraph.h>
+#include <compiler/StaticShapeInferer.h>
+#include <ir/Index.h>
+
+#include <memory>
+#include <unordered_map>
+
+namespace onert
+{
+namespace compiler
+{
+
+/**
+ * @brief Create a shape inferer map for a lowered model
+ * @param[in] lowered_subgs lowered model map
+ * @return Shape inferer map
+ */
+template <typename LoweredGraphType,
+ typename = std::enable_if_t<std::is_base_of<ILoweredGraph, LoweredGraphType>::value>>
+static std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>>
+createStaticShapeInferers(
+ const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<LoweredGraphType>> &lowered_subgs)
+{
+ std::unordered_map<ir::SubgraphIndex, ILoweredGraph *> lsubgs;
+ for (auto &&e : lowered_subgs)
+ lsubgs[e.first] = e.second.get();
+ return StaticShapeInferer::createStaticShapeInferers(lsubgs);
+}
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_COMPILER_HELPERS_H__
diff --git a/runtime/onert/core/src/compiler/CompilerOptions.cc b/runtime/onert/core/src/compiler/CompilerOptions.cc
new file mode 100644
index 000000000..830d9dd00
--- /dev/null
+++ b/runtime/onert/core/src/compiler/CompilerOptions.cc
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "compiler/CompilerOptions.h"
+
+#include "../backend/builtin/Backend.h"
+
+#include "util/ConfigSource.h"
+#include "util/logging.h"
+
+#include <misc/string_helpers.h>
+
+namespace
+{
+
+using namespace onert;
+
+std::string getOpBackends(std::unordered_map<ir::OpCode, std::string> &opcode_to_backend)
+{
+ std::unordered_map<ir::OpCode, std::string>::iterator it;
+ std::string opbackends;
+
+ for (it = opcode_to_backend.begin(); it != opcode_to_backend.end(); ++it)
+ {
+ if (!opbackends.empty())
+ opbackends = opbackends + ", ";
+
+ auto opcode = it->first;
+ const std::string opname = ir::toString(opcode);
+ opbackends += opname + "=" + it->second;
+ }
+ return opbackends;
+}
+
+} // namespace
+
+namespace onert
+{
+namespace compiler
+{
+
+void ManualSchedulerOptions::setBackendMap(const std::string &str)
+{
+ // TODO Support multiple subgraphs for manual scheduling
+ auto key_val_list = nnfw::misc::split(str, ';');
+ for (const auto &key_val_str : key_val_list)
+ {
+ if (key_val_str.empty())
+ {
+ continue;
+ }
+
+ auto key_val = nnfw::misc::split(key_val_str, '=');
+ const auto &key_str = key_val.at(0);
+ const auto &val = key_val.at(1);
+ auto key = static_cast<uint32_t>(std::stoi(key_str));
+ this->index_to_backend.emplace(ir::OperationIndex{key}, val);
+ }
+}
+
+std::unique_ptr<CompilerOptions> CompilerOptions::fromGlobalConfig()
+{
+ auto o = std::make_unique<CompilerOptions>();
+ o->backend_list = nnfw::misc::split(util::getConfigString(util::config::BACKENDS), ';');
+ o->minmax_filepath = util::getConfigString(util::config::MINMAX_FILEPATH);
+ o->trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH);
+ o->graph_dump_level = util::getConfigInt(util::config::GRAPH_DOT_DUMP);
+ o->executor = util::getConfigString(util::config::EXECUTOR);
+ o->he_scheduler = util::getConfigBool(util::config::USE_SCHEDULER);
+ o->he_profiling_mode = util::getConfigBool(util::config::PROFILING_MODE);
+ o->fp16_enable = util::getConfigBool(util::config::FP16_ENABLE);
+ {
+ // Backend for all
+ auto &ms_options = o->manual_scheduler_options;
+
+ // Default value for op_backend_all is first element in the backend list
+ ms_options.backend_for_all = util::getConfigString(util::config::OP_BACKEND_ALLOPS);
+
+// Opcode to Backend
+#define OP(OpName) \
+ { \
+ const auto &backend_str = util::getConfigString(util::config::OP_BACKEND_##OpName); \
+ if (!backend_str.empty()) \
+ { \
+ ms_options.opcode_to_backend[ir::OpCode::OpName] = backend_str; \
+ } \
+ }
+#include "ir/Operations.lst"
+#undef OP
+
+ // Index to Backend
+ auto map_str = util::getConfigString(util::config::OP_BACKEND_MAP);
+ ms_options.setBackendMap(map_str);
+ }
+ return o;
+}
+
+void CompilerOptions::forceInternalOptions()
+{
+ // Set control flow backend for control flow operators
+ auto &builtin_id = backend::builtin::Config::ID;
+ manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id;
+ manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id;
+ manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id;
+
+ // FIXME This is a workaround for bcq operations, should remove it
+ manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq";
+ manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq";
+
+ // FIXME This is a workaround for bulk operations, should remove it
+ manual_scheduler_options.opcode_to_backend[ir::OpCode::Bulk] = "trix";
+}
+
+void CompilerOptions::verboseOptions()
+{
+ VERBOSE(Compiler) << std::boolalpha << "==== Compiler Options ====" << std::endl;
+ VERBOSE(Compiler) << "backend_list : "
+ << nnfw::misc::join(backend_list.begin(), backend_list.end(), "/") << std::endl;
+ VERBOSE(Compiler) << "trace_filepath : " << trace_filepath << std::endl;
+ VERBOSE(Compiler) << "graph_dump_level : " << graph_dump_level << std::endl;
+ VERBOSE(Compiler) << "executor : " << executor << std::endl;
+ VERBOSE(Compiler) << "manual backend_for_all : " << manual_scheduler_options.backend_for_all
+ << std::endl;
+ VERBOSE(Compiler) << "manual_scheduler_options : "
+ << getOpBackends(manual_scheduler_options.opcode_to_backend) << std::endl;
+ VERBOSE(Compiler) << "he_scheduler : " << he_scheduler << std::endl;
+ VERBOSE(Compiler) << "he_profiling_mode : " << he_profiling_mode << std::endl;
+ VERBOSE(Compiler) << "fp16_enable : " << fp16_enable << std::endl
+ << std::noboolalpha;
+}
+
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.cc b/runtime/onert/core/src/compiler/ExecutorFactory.cc
index 062c6c9c3..6a08524cc 100644
--- a/runtime/onert/core/src/compiler/ExecutorFactory.cc
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.cc
@@ -16,26 +16,37 @@
#include "ExecutorFactory.h"
+#include "Linear.h"
+#include "../backend/builtin/BackendContext.h"
+#include "../backend/builtin/Config.h"
+#include "../backend/builtin/UserTensor.h"
+#include "../dumper/text/GraphDumper.h"
+#include "../exec/DataflowExecutor.h"
+#include "../exec/ExecTime.h"
+#include "../exec/ExecutionObservers.h"
+#include "../exec/LinearExecutor.h"
+#ifdef MINMAX_H5DUMPER
+#include "../exec/MinMaxRecorder.h"
+#endif
+#include "../exec/ParallelExecutor.h"
+#include "../ir/OperationCloner.h"
+
+#include <backend/IPortableTensor.h>
+#include <compiler/BackendManager.h>
+#include <compiler/ExecutionBuilder.h>
+#include <util/TracingCtx.h>
+
#include <functional>
-#include "exec/ExecutionObservers.h"
-#include "exec/LinearExecutor.h"
-#include "exec/DataflowExecutor.h"
-#include "exec/ParallelExecutor.h"
-#include "compiler/BackendManager.h"
-#include "compiler/ExecutionBuilder.h"
-#include "exec/ExecTime.h"
-#include "compiler/Linear.h"
-#include "compiler/TensorBuilders.h"
-#include "backend/IConstantInitializer.h"
-#include "backend/IKernelGenerator.h"
-#include "backend/IOptimizer.h"
-#include "backend/ITensorRegister.h"
-#include "backend/controlflow/Config.h"
-#include "backend/controlflow/KernelGenerator.h"
-#include "backend/controlflow/UserTensor.h"
-#include "backend/controlflow/TensorBuilder.h"
#include <memory>
+#ifdef ONERT_TRAIN
+#include "../backend/builtin/train/BackendContext.h"
+#include "../exec/train/TrainableExecutor.h"
+
+#include <backend/train/TrainableBackendContext.h>
+#include <backend/train/ITrainableBackend.h>
+#endif // ONERT_TRAIN
+
namespace onert
{
namespace
@@ -46,7 +57,7 @@ class SyncFunction final : public exec::IFunction
public:
virtual ~SyncFunction() = default;
SyncFunction(std::unique_ptr<exec::IFunction> fn, const std::shared_ptr<backend::IConfig> config)
- : _fn{std::move(fn)}, _config{config}
+ : _fn{std::move(fn)}, _config{config}
{
assert(_fn);
assert(_config);
@@ -65,21 +76,218 @@ private:
std::shared_ptr<backend::IConfig> _config;
};
-// TODO Think of a better way to manage TensorManagers
-backend::TensorManagerSet createTensorManagerSet(const compiler::TensorBuilders &tensor_builders)
+using DeallocList = std::vector<backend::ITensor *>;
+// Deallocation after execution of an operation used by Linear Executor
+class DeallocFunction final : public exec::IFunction
+{
+public:
+ DeallocFunction(const DeallocList &tensors) : _dealloc_list{tensors} {}
+
+ void run() override
+ {
+ for (auto &&tensor : _dealloc_list)
+ {
+ if (!tensor->is_dynamic())
+ continue;
+ tensor->deallocBuffer();
+ }
+ }
+
+private:
+ DeallocList _dealloc_list;
+};
+
+// TODO Unify initializeSubgraphIOTensors
+void initializeSubgraphIOTensors(compiler::ILoweredGraph &lowered_graph,
+ const backend::BackendContexts &backend_contexts,
+ const ir::OperandIndexSequence &indices)
+{
+ // TODO Store builtin backend in BackendContext
+ std::shared_ptr<backend::builtin::TensorRegistry> builtin_tensor_reg;
+ for (const auto &e : backend_contexts)
+ {
+ auto backend = e.first;
+ auto &context = e.second;
+ if (backend->config()->id() == backend::builtin::Config::ID)
+ {
+ builtin_tensor_reg =
+ std::dynamic_pointer_cast<backend::builtin::TensorRegistry>(context->tensor_registry);
+ }
+ }
+ assert(builtin_tensor_reg);
+
+ for (auto &&ind : indices)
+ {
+ const auto &operand = lowered_graph.graph().operands().at(ind);
+ auto tensor = std::make_unique<backend::builtin::IOTensor>(
+ operand.info(),
+ ir::Layout::NHWC /* FIXME find operation for this operand and use frontend_layout */
+ );
+
+ // Add tensor to builtin TensorRegistry.
+ builtin_tensor_reg->setNativeIOTensor(ind, std::move(tensor));
+ }
+}
+
+#ifdef ONERT_TRAIN
+void initializeSubgraphIOTensors(compiler::ILoweredGraph &lowered_graph,
+ const backend::train::TrainableBackendContexts &backend_contexts,
+ const ir::OperandIndexSequence &indices)
{
- backend::TensorManagerSet tensor_mgrs;
- for (auto &tensor_builder : tensor_builders)
+ std::shared_ptr<backend::builtin::train::TensorRegistry> builtin_tensor_reg;
+ for (const auto &e : backend_contexts)
{
- auto s_tensor_manager = tensor_builder->releaseStaticTensorManager();
- if (s_tensor_manager != nullptr)
- tensor_mgrs.insert(std::move(s_tensor_manager));
+ auto backend = e.first;
+ auto &context = e.second;
+ if (backend->config()->id() == backend::builtin::Config::ID)
+ {
+ builtin_tensor_reg = std::dynamic_pointer_cast<backend::builtin::train::TensorRegistry>(
+ context->tensor_registry());
+ }
+ }
+ assert(builtin_tensor_reg);
+
+ for (auto &&ind : indices)
+ {
+ const auto &operand = lowered_graph.graph().operands().at(ind);
+ auto tensor = std::make_unique<backend::builtin::IOTensor>(
+ operand.info(),
+ ir::Layout::NHWC /* FIXME find operation for this operand and use frontend_layout */
+ );
+
+ // Add tensor to builtin TensorRegistry.
+ builtin_tensor_reg->setNativeIOTensor(ind, std::move(tensor));
+ }
+}
+#endif // ONERT_TRAIN
+
+backend::BackendContexts
+createBackendContexts(compiler::ILoweredGraph &lgraph, bool linear_executor,
+ std::shared_ptr<backend::custom::IKernelBuilder> custom_kernel_builder)
+{
+ backend::BackendContexts contexts;
+ auto &backend_manager = compiler::BackendManager::get();
+
+ std::unordered_map<const backend::Backend *, backend::ContextData> context_data_map;
+
+ // Generate partial graphs for each backend
+ for (auto &&backend : backend_manager.getAll())
+ {
+ auto &data = context_data_map[backend];
+ auto graph = std::make_unique<ir::Graph>();
+ graph->setLayout(lgraph.graph().layout());
+ data.graph = std::move(graph);
+ }
+
+ auto &whole_graph = lgraph.graph();
+ // Separate operands into partial graphs
+ whole_graph.operands().iterate([&](const ir::OperandIndex &operand_ind, ir::Operand &operand) {
+ auto &operand_li = lgraph.lower_info().operand;
+ const auto &def_factors = operand_li.at(operand_ind).def_factors();
+ if (def_factors.size() == 0) // Ignore unused tensor
+ return;
+ const auto &def_factor = def_factors.getOnlyElement();
+ const auto backend = def_factor.backend();
+ auto &partial_graph = *context_data_map[backend].graph;
+ auto &operand_layouts = context_data_map[backend].operand_layouts;
+ assert(operand_layouts.find(operand_ind) == operand_layouts.end());
+ operand_layouts[operand_ind] = def_factor.layout();
+
+ // Copy the operand and insert it to the partial graph
+ auto new_operand = std::make_unique<ir::Operand>(operand);
+ new_operand->clearDefUse();
+ operand.releaseData(); // Deref data of LoweredGraph
+ auto new_operand_ind = partial_graph.addOperand(operand_ind, std::move(new_operand));
+ UNUSED_RELEASE(new_operand_ind);
+ assert(new_operand_ind == operand_ind);
+ });
+ // Separate operations into partial graphs
+ whole_graph.operations().iterate(
+ [&](const ir::OperationIndex &op_ind, const ir::IOperation &operation) {
+ auto &op_li = lgraph.lower_info().operation;
+ auto backend = op_li.at(op_ind).backend();
+ auto &partial_graph = *context_data_map[backend].graph;
+ auto &external_operands = context_data_map[backend].external_operands;
+ auto &operand_layouts = context_data_map[backend].operand_layouts;
+
+ {
+ // Add missing operands (externals)
+ auto io_list = (operation.getInputs() + operation.getOutputs()) | ir::Remove::DUPLICATED |
+ ir::Remove::UNDEFINED;
+ for (auto &&operand_ind : io_list)
+ {
+ if (partial_graph.operands().exist(operand_ind))
+ continue;
+
+ // Copy the operand and insert it to the partial graph
+ const auto &operand = whole_graph.operands().at(operand_ind);
+ auto new_operand = std::make_unique<ir::Operand>(operand);
+ new_operand->clearDefUse();
+ auto new_operand_ind = partial_graph.addOperand(operand_ind, std::move(new_operand));
+ UNUSED_RELEASE(new_operand_ind);
+ assert(new_operand_ind == operand_ind);
+
+ auto layout =
+ lgraph.lower_info().operand.at(operand_ind).def_factors().getOnlyElement().layout();
+ assert(operand_layouts.find(operand_ind) == operand_layouts.end());
+ operand_layouts[operand_ind] = layout;
+ external_operands.add(operand_ind);
+ }
+
+ auto new_op_ind = partial_graph.addOperation(op_ind, clone(operation));
+ UNUSED_RELEASE(new_op_ind);
+ assert(new_op_ind == op_ind);
+ }
+ });
+
+ // Create contexts
+ auto whole_op_order = lgraph.graph().topolSortOperations();
+ for (auto &&pair : context_data_map)
+ {
+ auto backend = pair.first;
+ auto &data = pair.second;
+ // Handle graph input/outputs or external tensors
+ data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
+ if (whole_graph.getInputs().contains(ind) || whole_graph.getOutputs().contains(ind))
+ data.external_operands.add(ind);
+ // Inputs are either "graph input" or "no def op and non-constant"
+ if (whole_graph.getInputs().contains(ind) ||
+ (!operand.getDef().valid() && !operand.isConstant()))
+ // Outputs are either "graph output" or "no uses"
+ data.graph->addInput(ind);
+ if (whole_graph.getOutputs().contains(ind) || operand.getUses().size() == 0)
+ data.graph->addOutput(ind);
+ });
+ dumper::text::dumpGraph(*data.graph);
+
+ std::copy_if(whole_op_order.begin(), whole_op_order.end(), std::back_inserter(data.op_order),
+ [&](const auto &ind) { return data.graph->operations().exist(ind); });
+ data.is_linear_executor = linear_executor;
+ data.custom_kernel_builder = custom_kernel_builder;
+ contexts.emplace(backend, backend->newContext(std::move(data)));
+ }
+ return contexts;
+}
+
+template <typename Context>
+std::deque<std::pair<const backend::Backend *, Context *>> orderBackendContext(
+ const std::unordered_map<const backend::Backend *, std::unique_ptr<Context>> &tbackend_contexts)
+{
+ std::deque<std::pair<const backend::Backend *, Context *>> ordered_contexts;
- auto d_tensor_manager = tensor_builder->releaseDynamicTensorManager();
- if (d_tensor_manager != nullptr)
- tensor_mgrs.insert(std::move(d_tensor_manager));
+ for (auto &&pair : tbackend_contexts)
+ {
+ // NOTE builtin backend must be processed lastly.
+ // This is because of Permute layer's specialty which is the only operation that could have
+ // different ITensor objects for the input and the output. And it requires all other backends'
+ // tensors are ready to use.
+ if (pair.first->config()->id() == "builtin")
+ ordered_contexts.emplace_back(pair.first, pair.second.get());
+ else
+ ordered_contexts.emplace_front(pair.first, pair.second.get());
}
- return tensor_mgrs;
+
+ return ordered_contexts;
}
} // namespace
@@ -106,415 +314,588 @@ ExecutorFactory::ExecutorFactory()
}
exec::IExecutor *ExecutorFactory::create(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
- const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::ExecutorMap> &executor_map)
+ const std::shared_ptr<exec::IExecutors> &executors,
+ const ExecutorFactoryArgs &args)
{
- return _map.at(options.executor)(std::move(lowered_graph), options, executor_map);
+ assert(args.options != nullptr);
+ return _map.at(args.options->executor)(std::move(lowered_graph), executors, args);
}
-void ExecutorFactory::initializeBackendContext(compiler::LoweredGraph *lowered_graph)
+void ExecutorFactory::prepareMigrantTensors(compiler::ILoweredGraph &lowered_graph,
+ const backend::BackendContexts &backend_contexts)
{
- struct Entry
- {
- std::vector<backend::BackendContext::OperationInfo> operation_list;
- std::vector<ir::OperandIndex> operand_list;
- };
- std::unordered_map<const backend::Backend *, Entry> backend_assets;
-
- // Build lists for operations
- lowered_graph->op_seqs().iterate(
- [&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) {
- auto &op_seq_li = lowered_graph->getLowerInfo()->op_seq;
- auto backend = op_seq_li.at(op_seq_index)->backend();
- for (auto &operation_idx : op_seq.operations())
+ TensorRegistries tensor_regs{backend_contexts, true};
+
+ lowered_graph.graph().operations().iterate(
+ [&](const ir::OperationIndex &op_ind, const ir::IOperation &op) {
+ auto lower_info = lowered_graph.lower_info().operation.getRawPtr(op_ind);
+ auto &backend_ctx = backend_contexts.at(lower_info->backend());
+ for (auto &&ind :
+ (op.getInputs() + op.getOutputs()) | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
+ {
+ // If an Operation's input/output tensor does not have an own tensor object,
+ // it must be using migrant tensors, so find the tensor from other tensor registries and
+ // register it to the current tensor registry if it is portable
+ if (!backend_ctx->tensor_registry->getITensor(ind))
{
- backend_assets[backend].operation_list.emplace_back(operation_idx, op_seq.getLayout());
+ auto tensor = tensor_regs.getITensor(ind);
+ assert(tensor); // The tensor must have been registered
+ auto ptensor = dynamic_cast<backend::IPortableTensor *>(tensor);
+ if (ptensor)
+ backend_ctx->tensor_registry->setMigrantTensor(ind, ptensor);
}
- });
+ }
+ });
+}
- // Build lists for operands
- lowered_graph->graph().operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
- const auto lower_info = lowered_graph->getLowerInfo(ind);
- for (auto factor : lower_info->def_factors())
+void ExecutorFactory::prepareBuiltinBackend(const TensorRegistries &tensor_regs,
+ const std::shared_ptr<exec::IExecutors> &executors,
+ const backend::BackendContexts &backend_contexts,
+ const ir::ModelIndex &index)
+{
+ for (auto &&pair : backend_contexts)
+ {
+ auto builtin_context = dynamic_cast<backend::builtin::BackendContext *>(pair.second.get());
+ if (builtin_context != nullptr)
{
- auto backend = factor.backend();
- backend_assets[backend].operand_list.emplace_back(ind);
+ auto builtin_kernel_gen = builtin_context->kernel_gen;
+ builtin_kernel_gen->setTensorRegistries(tensor_regs);
+ builtin_kernel_gen->setExecutors(executors);
+ builtin_kernel_gen->setModelIndex(index);
}
- });
+ }
+}
- for (auto &pair : backend_assets)
+std::deque<std::pair<const backend::Backend *, backend::BackendContext *>>
+ExecutorFactory::orderBackendContext(const backend::BackendContexts &backend_contexts)
+{
+ std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts;
+ for (auto &&pair : backend_contexts)
{
- auto backend = pair.first;
- auto &arg = pair.second;
- lowered_graph->backend_contexts().at(backend)->initialize(arg.operation_list, arg.operand_list);
+ // NOTE builtin backend must be processed lastly.
+ // This is because of Permute layer's specialty which is the only operation that could have
+ // different ITensor objects for the input and the output. And it requires all other backends'
+ // tensors are ready to use.
+ if (pair.first->config()->id() == "builtin")
+ ordered_contexts.emplace_back(pair.first, pair.second.get());
+ else
+ ordered_contexts.emplace_front(pair.first, pair.second.get());
}
+ return ordered_contexts;
}
-void ExecutorFactory::runTensorRegistration(compiler::LoweredGraph *lowered_graph,
- const std::vector<ir::OpSequenceIndex> &order)
+exec::IExecutor *
+ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
+ const std::shared_ptr<exec::IExecutors> &executors,
+ const ExecutorFactoryArgs &args)
{
- for (const auto index : order)
+ const auto options = args.options;
+ const auto &model_index = args.model_index;
+ const auto tracing_ctx = args.tracing_ctx;
+ auto custom_kernel_builder = args.custom_kernel_builder;
+ auto &graph = lowered_graph->graph();
+
+ backend::BackendContexts backend_contexts =
+ createBackendContexts(*lowered_graph, options->executor == "Linear", custom_kernel_builder);
+
+ TensorRegistries tensor_regs{backend_contexts, true};
+
+ initializeSubgraphIOTensors(
+ *lowered_graph, backend_contexts,
+ (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) |
+ ir::Remove::DUPLICATED | ir::Remove::UNDEFINED);
+
+ // linearize
+ auto order = Linear::linearize(*lowered_graph);
+ Linear::dump(*lowered_graph, order);
+
+ for (auto &&pair : backend_contexts)
{
- const auto &op_seq = lowered_graph->op_seqs().at(index);
- const auto backend = lowered_graph->getLowerInfo(index)->backend();
- const auto tensor_register = lowered_graph->backend_contexts().at(backend)->tensor_register;
- auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder;
- auto model_io = lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs();
+ pair.second->genTensors();
+ }
+
+ prepareMigrantTensors(*lowered_graph, backend_contexts);
- if (tensor_register)
+ // Give some runtime objects to builtin KernelGenerator
+ prepareBuiltinBackend(tensor_regs, executors, backend_contexts, model_index);
+
+ ExecutionBuilder builder;
+
+ // Adjust the order of backends for the upcoming iteration
+ auto ordered_contexts = orderBackendContext(backend_contexts);
+
+ // Simulate the execution for deallocation of tensors
+ std::unordered_map<ir::OperationIndex, DeallocList> dealloc_list_map;
+ {
+ ir::OperandIndexMap<uint32_t> uses_map;
+ ir::OperandIndexSequence constants;
+
+ auto model_io =
+ (graph.getInputs() + graph.getOutputs()) | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+
+ // Prepare scanning
+ graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+ uses_map[ind] = obj.getUses().size();
+
+ if (obj.isConstant())
+ constants.append(ind);
+ });
+
+ // A trick to consider constants as an execption
+ for (const auto &ind : constants)
{
- // Custom registration
- tensor_register->registerTensors(op_seq, lowered_graph->getLowerInfo());
+ uses_map[ind]++;
}
- else
+
+ for (const auto &op_ind : order)
{
- // Default registration
- for (const auto op_idx : op_seq)
+ const auto &op = graph.operations().at(op_ind);
+ auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+ auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+
+ for (const auto &ind : op_inputs)
{
- const auto &op = lowered_graph->graph().operations().at(op_idx);
- for (const auto &index : (op.getInputs() | ir::Remove::UNDEFINED) + op.getOutputs())
+ const auto &operand = graph.operands().at(ind);
+ assert(uses_map.find(ind) != uses_map.end());
+ assert(uses_map[ind] > 0);
+ uses_map[ind]--;
+ if (uses_map[ind] == 0 && !operand.info().isVariable() && !model_io.contains(ind))
{
- if (!tensor_builder->isRegistered(index) && !model_io.contains(index))
- {
- const auto &operand_lower_info =
- lowered_graph->getLowerInfo(index)->def_factors().getOnlyElement();
-
- // E.g., permute (CPU) -> tensor A -> MaxPool2D(acl_cl)
- // op.getOutputs() of permute (CPU) returns tensor A
- // but tensor A belongs to the backend of acl_cl.
- // So, we have to make this tensor NOT registered for CPU.
- if (operand_lower_info.backend() != backend)
- continue;
-
- const auto &obj = lowered_graph->graph().operands().at(index);
- const auto frontend_layout = op_seq.getLayout();
- const auto backend_layout = operand_lower_info.layout();
- ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
- obj.typeInfo(), obj.info().memAllocType(),
- obj.isConstant()};
- tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
- }
+ dealloc_list_map[op_ind].emplace_back(tensor_regs.getITensor(ind));
}
}
}
- }
-}
-std::vector<std::shared_ptr<backend::ITensor>>
-ExecutorFactory::initializeModelIOTensors(compiler::LoweredGraph &lowered_graph,
- const ir::OperandIndexSequence &indices)
-{
- std::vector<std::shared_ptr<backend::ITensor>> ret;
+ // Dispose and validate
+ for (const auto &ind : constants)
+ {
+ --uses_map[ind];
+ }
- // TODO Store controlflow backend in BackendContext
- std::shared_ptr<backend::controlflow::TensorBuilder> cf_tensor_builder;
- std::shared_ptr<backend::controlflow::TensorRegistry> cf_tensor_reg;
- for (const auto &e : lowered_graph.backend_contexts())
+ assert(
+ std::all_of(uses_map.begin(), uses_map.end(),
+ [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+ }
+
+ // Generate kernels
+ for (auto &&pair : ordered_contexts)
{
- auto backend = e.first;
- auto &context = e.second;
- if (backend->config()->id() == backend::controlflow::Config::ID)
+ auto codes = pair.second->genKernels();
+ for (auto &&pair : codes)
{
- cf_tensor_builder =
- std::dynamic_pointer_cast<backend::controlflow::TensorBuilder>(context->tensor_builder);
- cf_tensor_reg =
- std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(context->tensor_registry);
+ auto &op_ind = pair.first;
+ auto &fn_seq = pair.second;
+ auto &op = lowered_graph->graph().operations().at(op_ind);
+ auto lower_info = lowered_graph->lower_info().operation.getRawPtr(op_ind);
+ if (options->he_profiling_mode)
+ fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
+ if (!dealloc_list_map[op_ind].empty())
+ fn_seq->append(std::make_unique<DeallocFunction>(dealloc_list_map[op_ind]));
+ builder.append(op_ind, {op_ind, &op, lower_info, std::move(fn_seq)});
}
}
- assert(cf_tensor_builder);
- assert(cf_tensor_reg);
- for (auto ind : indices)
+ auto code_map = builder.releaseCodeMap();
+
+ auto exec = new exec::LinearExecutor{std::move(lowered_graph),
+ std::move(backend_contexts),
+ tensor_regs,
+ std::move(code_map),
+ order,
+ tracing_ctx};
+
+ if (!options->trace_filepath.empty())
{
- const auto &operand = lowered_graph.graph().operands().at(ind);
- auto tensor = std::make_shared<backend::controlflow::UserTensor>(
- operand.info(),
- ir::Layout::NHWC, /* FIXME find op_seq for this operand and use frontend_layout */
- cf_tensor_builder->dynamicTensorManager());
-
- // Add tensor to controlflow TensorRegistry.
- cf_tensor_reg->setNativeUserTensor(ind, tensor);
- ret.push_back(tensor);
+ std::unique_ptr<exec::IExecutionObserver> ctp =
+ std::make_unique<exec::TracingObserver>(options->trace_filepath, exec->graph(), tracing_ctx);
+ exec->addObserver(std::move(ctp));
}
- return ret;
-}
+#ifdef MINMAX_H5DUMPER
+ if (!options->minmax_filepath.empty())
+ exec->addObserver(std::make_unique<exec::MinMaxRecorder>(
+ options->minmax_filepath, exec->graph(), exec->getBackendContexts()));
+#endif
-void ExecutorFactory::prepareExternalTensors(compiler::LoweredGraph &lowered_graph)
-{
- TensorRegistries tensor_regs{lowered_graph.backend_contexts(), true};
-
- lowered_graph.op_seqs().iterate(
- [&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) {
- auto lower_info = lowered_graph.getLowerInfo(op_seq_index);
- auto &backend_ctx = lowered_graph.backend_contexts().at(lower_info->backend());
- for (auto ind : (op_seq.getInputs() + op_seq.getOutputs()) | ir::Remove::DUPLICATED |
- ir::Remove::UNDEFINED)
- {
- // If an OpSequence input/output tensor does not have a own tensor object,
- // it must be using external tensors, so find the tensor from other tensor builders and
- // set the tensor to this tensor builder if portable
- if (!backend_ctx->tensor_registry->getITensor(ind))
- {
- auto tensor = tensor_regs.getITensor(ind);
- assert(tensor); // The tensor must have been registered
- auto ptensor = std::dynamic_pointer_cast<backend::IPortableTensor>(tensor);
- if (ptensor)
- backend_ctx->tensor_registry->setMigrantTensor(ind, ptensor);
- }
- }
- });
+ return exec;
}
exec::IExecutor *
-ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
- const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::ExecutorMap> &executor_map)
+ExecutorFactory::createDataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
+ const std::shared_ptr<exec::IExecutors> &executors,
+ const ExecutorFactoryArgs &args, bool parallel)
{
- const auto &backend_contexts = lowered_graph->backend_contexts();
+ const auto options = args.options;
+ const auto &model_index = args.model_index;
+ const auto tracing_ctx = args.tracing_ctx;
+ auto custom_kernel_builder = args.custom_kernel_builder;
- initializeBackendContext(lowered_graph.get());
+ backend::BackendContexts backend_contexts =
+ createBackendContexts(*lowered_graph, options->executor == "Linear", custom_kernel_builder);
- // linearize
- assert(!lowered_graph->graph().isBuildingPhase());
+ TensorRegistries tensor_regs{backend_contexts, true};
- /*************************************************
- * Backend dependent analysis & optimization phase
- *************************************************/
+ initializeSubgraphIOTensors(
+ *lowered_graph, backend_contexts,
+ (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) |
+ ir::Remove::DUPLICATED | ir::Remove::UNDEFINED);
- for (auto &pair : backend_contexts)
+ for (auto &&pair : backend_contexts)
{
- auto &optimizer = pair.second->optimizer;
- if (optimizer)
- optimizer->optimize();
+ pair.second->genTensors();
}
- /**********************************************************
- * Backend dependent analysis & optimization phase finished
- **********************************************************/
+ prepareMigrantTensors(*lowered_graph, backend_contexts);
- /***********************
- * Code generation phase
- ***********************/
+ // Give some runtime objects to builtin KernelGenerator
+ prepareBuiltinBackend(tensor_regs, executors, backend_contexts, model_index);
- auto order = Linear::linearize(*lowered_graph);
- runTensorRegistration(lowered_graph.get(), order);
+ ExecutionBuilder builder;
- std::vector<std::shared_ptr<backend::ITensor>> input_tensors;
- std::vector<std::shared_ptr<backend::ITensor>> output_tensors;
- if (options.is_primary_subgraph)
+ // Adjust the order of backends for the upcoming iteration
+ auto ordered_contexts = orderBackendContext(backend_contexts);
+
+ // Generate kernels
+ for (auto &&pair : ordered_contexts)
{
- input_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getInputs());
- output_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getOutputs());
+ auto codes = pair.second->genKernels();
+ for (auto &&pair : codes)
+ {
+ auto &op_ind = pair.first;
+ auto &fn_seq = pair.second;
+ auto &op = lowered_graph->graph().operations().at(op_ind);
+ auto lower_info = lowered_graph->lower_info().operation.getRawPtr(op_ind);
+ if (options->he_profiling_mode)
+ fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
+ builder.append(op_ind, {op_ind, &op, lower_info, std::move(fn_seq)});
+ }
}
- Linear::dump(*lowered_graph, order);
- Linear::planTensors(*lowered_graph, order);
+ auto code_map = builder.releaseCodeMap();
- TensorBuilders tensor_builders{lowered_graph->backend_contexts(), true};
- TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
+ exec::ExecutorBase *exec = nullptr;
+ if (parallel)
+ {
+ exec = new exec::ParallelExecutor{std::move(lowered_graph), std::move(backend_contexts),
+ tensor_regs, std::move(code_map), tracing_ctx};
+ }
+ else
+ {
+ auto dataflow_exec =
+ new exec::DataflowExecutor{std::move(lowered_graph), std::move(backend_contexts), tensor_regs,
+ std::move(code_map), tracing_ctx};
+ if (options->he_profiling_mode)
+ {
+ std::vector<const backend::Backend *> backends;
+ for (const auto &pair : backend_contexts)
+ {
+ backends.push_back(pair.first);
+ }
+ auto et = std::make_shared<exec::ExecTime>(backends);
+ std::unique_ptr<exec::IExecutionObserver> obs =
+ std::make_unique<exec::ProfileObserver>(et, dataflow_exec->graph());
+ dataflow_exec->addObserver(std::move(obs));
+ }
+ exec = dataflow_exec;
+ }
- for (auto &tensor_builder : tensor_builders)
+ if (!options->trace_filepath.empty())
{
- tensor_builder->prepare();
+ std::unique_ptr<exec::IExecutionObserver> ctp =
+ std::make_unique<exec::TracingObserver>(options->trace_filepath, exec->graph(), tracing_ctx);
+ exec->addObserver(std::move(ctp));
}
- prepareExternalTensors(*lowered_graph);
+ return exec;
+}
- ExecutionBuilder builder;
+#ifdef ONERT_TRAIN
+exec::IExecutor *
+ExecutorFactory::create(std::unique_ptr<compiler::train::LoweredTrainableGraph> lowered_graph,
+ const std::shared_ptr<exec::IExecutors> &executors,
+ const ExecutorFactoryArgs &args,
+ const std::shared_ptr<exec::train::optimizer::Optimizer> &optimizer)
+{
+ assert(args.options != nullptr);
- // Generate kernels
- lowered_graph->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &op_seq_index,
- const ir::OpSequence &op_seq) {
- auto lower_info = lowered_graph->getLowerInfo(op_seq_index);
- auto kernel_gen = lowered_graph->backend_contexts().at(lower_info->backend())->kernel_gen;
- // Set TensorBuilderSet and ExecutorMap to kernel_gen of control flow
- auto cf_kernel_gen = dynamic_cast<backend::controlflow::KernelGenerator *>(kernel_gen.get());
- if (cf_kernel_gen != nullptr)
+ if (args.options->executor != "Linear")
+ throw std::runtime_error("ExecutorFactory: TrainableExecutor supports only 'Linear' now");
+
+ return createTrainableExecutor(std::move(lowered_graph), executors, args, optimizer);
+}
+
+void ExecutorFactory::prepareMigrantTensors(
+ compiler::ILoweredGraph &lowered_graph,
+ const backend::train::TrainableBackendContexts &backend_contexts)
+{
+ train::TensorRegistries tensor_regs{backend_contexts, true};
+
+ lowered_graph.graph().operations().iterate(
+ [&](const ir::OperationIndex &op_ind, const ir::IOperation &op) {
+ auto lower_info = lowered_graph.lower_info().operation.getRawPtr(op_ind);
+ auto &backend_ctx = backend_contexts.at(lower_info->backend());
+ for (auto &&ind :
+ (op.getInputs() + op.getOutputs()) | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
+ {
+ // If an Operation's input/output tensor does not have an own tensor object,
+ // it must be using migrant tensors, so find the tensor from other tensor registries and
+ // register it to the current tensor registry if it is portable
+ if (!backend_ctx->tensor_registry()->getITensor(ind))
+ {
+ auto tensor = tensor_regs.getITensor(ind);
+ assert(tensor); // The tensor must have been registered
+ auto ptensor = dynamic_cast<backend::IPortableTensor *>(tensor);
+ if (ptensor)
+ backend_ctx->tensor_registry()->setMigrantTensor(ind, ptensor);
+ }
+ }
+ });
+}
+
+exec::IExecutor *ExecutorFactory::createTrainableExecutor(
+ std::unique_ptr<compiler::train::LoweredTrainableGraph> lowered_graph,
+ const std::shared_ptr<exec::IExecutors> &, const ExecutorFactoryArgs &args,
+ const std::shared_ptr<exec::train::optimizer::Optimizer> &optimizer)
+{
+ const auto options = args.options;
+ const auto tracing_ctx = args.tracing_ctx;
+ auto custom_kernel_builder = args.custom_kernel_builder;
+
+ auto &graph = lowered_graph->graph();
+
+ lowered_graph->trainable_graph().operations().iterate([](const onert::ir::OperationIndex &,
+ const onert::ir::IOperation &op) {
+ try
{
- cf_kernel_gen->setTensorRegistries(tensor_regs);
- cf_kernel_gen->setExecutorMap(executor_map);
+ UNUSED_RELEASE(dynamic_cast<const ir::train::ITrainableOperation &>(op));
}
- auto fn_seq = kernel_gen->generate(op_seq);
- if (options.he_profiling_mode)
+ catch (std::bad_cast &)
{
- fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
+ throw std::runtime_error("ExecutorFactory: " + op.name() + " is not trainable operation yet");
}
- builder.append(op_seq_index, {&op_seq, lower_info, std::move(fn_seq)});
});
- for (auto &tensor_builder : tensor_builders)
- {
- tensor_builder->allocate();
- }
+ // TODO Create context only once instead of replacing
+ backend::train::TrainableBackendContexts tbackend_contexts;
+ backend::BackendContexts base_backend_contexts =
+ createBackendContexts(*lowered_graph, true, custom_kernel_builder);
- for (auto &pair : backend_contexts)
+ // Replace BackendContext with TrainbleBackendContext
+ for (auto &&pair : base_backend_contexts)
{
- pair.second->initConsts();
- }
-
- lowered_graph->graph().operands().iterate(
- [](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
-
- auto code_map = builder.releaseCodeMap();
-
- for (auto &it : code_map)
- {
- auto op_seq_index = it.first;
- auto &fn_seq = it.second.fn_seq;
-
- fn_seq->iterate([&](exec::IFunction &ifunc) {
- ifunc.prepare();
- auto backend = lowered_graph->getLowerInfo(op_seq_index)->backend();
- auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder;
- tensor_builder->postFunctionPrepare();
+ auto ctx = pair.second.get();
+ const auto &data = ctx->data();
+
+ // Create partial and trainable graphs
+ auto tgraph = std::make_unique<ir::train::TrainableGraph>(*data.graph);
+ data.graph->operations().iterate(
+ [&](const onert::ir::OperationIndex &op_index, const onert::ir::IOperation &) {
+ const auto &orig_tgraph = lowered_graph->trainable_graph();
+ const auto &trainable_op = orig_tgraph.operation(op_index);
+ auto gen_index = tgraph->replaceOperation(op_index, trainable_op.clone());
+ UNUSED_RELEASE(gen_index);
+ assert(gen_index == op_index);
+ });
+ data.graph->operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &) {
+ const auto &orig_tgraph = lowered_graph->trainable_graph();
+ if (orig_tgraph.derivatives().exist(index))
+ {
+ const auto &deriv = orig_tgraph.derivatives().at(index);
+ auto new_deriv = std::make_unique<ir::Operand>(deriv);
+ auto gen_index = tgraph->addDerivative(index, std::move(new_deriv));
+ UNUSED_RELEASE(gen_index);
+ assert(gen_index == index);
+ }
});
- }
- backend::TensorManagerSet tensor_mgrs = createTensorManagerSet(tensor_builders);
- auto exec = new exec::LinearExecutor{
- std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
- std::move(tensor_mgrs), std::move(code_map), order};
+ // Remove outputs of whole graph from external_operands
+ auto external_operands = data.external_operands;
+ for (const auto &index : lowered_graph->trainable_graph().getOutputs())
+ {
+ if (external_operands.contains(index))
+ external_operands.remove(index);
+ }
- if (!options.trace_filepath.empty())
- {
- std::unique_ptr<exec::IExecutionObserver> ctp =
- std::make_unique<exec::ChromeTracingObserver>(options.trace_filepath, exec->graph());
- exec->addObserver(std::move(ctp));
+ // Set trainable context data
+ backend::train::TrainableContextData tdata;
+ tdata.tgraph = std::move(tgraph);
+ tdata.op_order = std::move(data.op_order);
+ tdata.external_operands = std::move(external_operands);
+ tdata.operand_layouts = std::move(data.operand_layouts);
+ tdata.custom_kernel_builder = std::move(data.custom_kernel_builder);
+ tdata.is_linear_executor = data.is_linear_executor;
+ tdata.optimizer = optimizer;
+
+ // TODO Remove dynamic_cast
+ try
+ {
+ const auto backend = pair.first;
+ const auto tbackend = dynamic_cast<const backend::train::ITrainableBackend *>(backend);
+ tbackend_contexts.emplace(backend, tbackend->newContext(std::move(tdata)));
+ }
+ catch (const std::bad_cast &)
+ {
+ throw std::runtime_error("ExecutorFactory: Invalid backend - TrainableExecutor does not "
+ "support non-trainble backends");
+ }
}
+ base_backend_contexts.clear();
- return exec;
-}
-
-exec::IExecutor *ExecutorFactory::createDataflowExecutor(
- std::unique_ptr<compiler::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel)
-{
- const auto &backend_contexts = lowered_graph->backend_contexts();
+ train::TensorRegistries tensor_regs{tbackend_contexts, true};
- initializeBackendContext(lowered_graph.get());
+ initializeSubgraphIOTensors(
+ *lowered_graph, tbackend_contexts,
+ (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) |
+ ir::Remove::DUPLICATED | ir::Remove::UNDEFINED);
+ // linearize
auto order = Linear::linearize(*lowered_graph);
- runTensorRegistration(lowered_graph.get(), order);
+ Linear::dump(*lowered_graph, order);
- std::vector<std::shared_ptr<backend::ITensor>> input_tensors;
- std::vector<std::shared_ptr<backend::ITensor>> output_tensors;
- if (options.is_primary_subgraph)
+ for (auto &&pair : tbackend_contexts)
{
- input_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getInputs());
- output_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getOutputs());
+ pair.second->genTensors();
}
- TensorBuilders tensor_builders{lowered_graph->backend_contexts(), true};
- TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
-
- // To make tensors never be deallocated, this is a workaround to use static memory planner
- for (auto &tensor_builder : tensor_builders)
+ for (auto &&pair : tbackend_contexts)
{
- lowered_graph->graph().operands().iterate(
- [&](const ir::OperandIndex &ind, const ir::Operand &) {
- if (tensor_builder->isRegistered(ind))
- {
- tensor_builder->notifyFirstUse(ind);
- }
- });
+ auto tctx = pair.second.get();
+ tctx->genTrainingTensors();
}
- for (auto &tensor_builder : tensor_builders)
+ prepareMigrantTensors(*lowered_graph, tbackend_contexts);
+
+ // Give some runtime objects to builtin KernelGenerator
+ for (auto &&pair : tbackend_contexts)
{
- tensor_builder->prepare();
+ auto builtin_context =
+ dynamic_cast<backend::builtin::train::BackendContext *>(pair.second.get());
+ if (builtin_context != nullptr)
+ {
+ auto builtin_kernel_gen = builtin_context->kernel_gen;
+ builtin_kernel_gen->setTensorRegistries(tensor_regs);
+ builtin_kernel_gen->setWholeGraphOutputs(lowered_graph->trainable_graph().getOutputs());
+ }
}
- prepareExternalTensors(*lowered_graph);
+ // Adjust the order of backends for the upcoming iteration
+ auto ordered_contexts =
+ onert::orderBackendContext<backend::train::TrainableBackendContext>(tbackend_contexts);
- ExecutionBuilder builder;
+ // TODO Remove this simulation
+ // Simulate the execution for deallocation of tensors
+ std::unordered_map<ir::OperationIndex, DeallocList> dealloc_list_map;
+ {
+ ir::OperandIndexMap<uint32_t> uses_map;
+ ir::OperandIndexSequence constants;
- // Generate kernels
- lowered_graph->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &op_seq_index,
- const ir::OpSequence &op_seq) {
- auto lower_info = lowered_graph->getLowerInfo(op_seq_index);
- auto kernel_gen = lowered_graph->backend_contexts().at(lower_info->backend())->kernel_gen;
- // Set TensorBuilderSet and ExecutorMap to kernel_gen of control flow
- auto cf_kernel_gen = dynamic_cast<backend::controlflow::KernelGenerator *>(kernel_gen.get());
- if (cf_kernel_gen != nullptr)
+ auto model_io =
+ (graph.getInputs() + graph.getOutputs()) | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+
+ // Prepare scanning
+ graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+ uses_map[ind] = obj.getUses().size();
+
+ if (obj.isConstant())
+ constants.append(ind);
+ });
+
+ // A trick to consider constants as an execption
+ for (const auto &ind : constants)
{
- assert(cf_kernel_gen != nullptr);
- cf_kernel_gen->setTensorRegistries(tensor_regs);
- cf_kernel_gen->setExecutorMap(executor_map);
+ uses_map[ind]++;
}
- auto fn_seq = kernel_gen->generate(op_seq);
- if (options.he_profiling_mode)
+
+ for (const auto op_ind : order)
{
- fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
+ const auto &op = graph.operations().at(op_ind);
+ auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+ auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+
+ for (const auto &ind : op_inputs)
+ {
+ const auto &operand = graph.operands().at(ind);
+ assert(uses_map.find(ind) != uses_map.end());
+ assert(uses_map[ind] > 0);
+ uses_map[ind]--;
+ if (uses_map[ind] == 0 && !operand.info().isVariable() && !model_io.contains(ind))
+ {
+ dealloc_list_map[op_ind].emplace_back(tensor_regs.getITensor(ind));
+ }
+ }
}
- builder.append(op_seq_index, {&op_seq, lower_info, std::move(fn_seq)});
- });
- for (const auto &tensor_builder : tensor_builders)
- {
- tensor_builder->allocate();
- }
+ // Dispose and validate
+ for (const auto &ind : constants)
+ {
+ --uses_map[ind];
+ }
- for (auto &pair : backend_contexts)
- {
- pair.second->initConsts();
+ assert(
+ std::all_of(uses_map.begin(), uses_map.end(),
+ [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
}
- lowered_graph->graph().operands().iterate(
- [](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
-
- auto code_map = builder.releaseCodeMap();
-
- for (auto &it : code_map)
+ // Check derivative tensors
{
- auto op_seq_index = it.first;
- auto &fn_seq = it.second.fn_seq;
-
- fn_seq->iterate([&](exec::IFunction &ifunc) {
- ifunc.prepare();
- auto backend = lowered_graph->getLowerInfo(op_seq_index)->backend();
- auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder;
- tensor_builder->postFunctionPrepare();
- });
+ // TODO Support multiple subgraphs
+ // Check if the derivative tensors corresponding to inputs of model are nullptr
+ // NOTE The derivative tensors corresponding to inputs of model are for inputs of PermuteLayers
+ // and they are nullptr and because they are meaningless.
+ assert(std::all_of(lowered_graph->trainable_graph().getInputs().begin(),
+ lowered_graph->trainable_graph().getInputs().end(),
+ [&](const auto &input_idx) {
+ return tensor_regs.getDerivativeITensor(input_idx) == nullptr;
+ }));
+
+ // Check if the derivative tensors corresponding to outputs of model exist
+ assert(std::all_of(lowered_graph->trainable_graph().getOutputs().begin(),
+ lowered_graph->trainable_graph().getOutputs().end(),
+ [&](const auto &output_idx) {
+ return tensor_regs.getDerivativeITensor(output_idx) == nullptr;
+ }));
}
- backend::TensorManagerSet tensor_mgrs = createTensorManagerSet(tensor_builders);
-
- exec::ExecutorBase *exec = nullptr;
- if (parallel)
- {
- exec = new exec::ParallelExecutor{std::move(lowered_graph), input_tensors,
- output_tensors, tensor_regs,
- std::move(tensor_mgrs), std::move(code_map)};
- }
- else
+ train::TrainableCodeMap code_map;
+ // Generate kernels
+ for (auto &&pair : ordered_contexts)
{
- auto dataflow_exec = new exec::DataflowExecutor{std::move(lowered_graph), input_tensors,
- output_tensors, tensor_regs,
- std::move(tensor_mgrs), std::move(code_map)};
- if (options.he_profiling_mode)
+ auto codes = pair.second->genKernels();
+ for (auto &&pair : codes)
{
- std::vector<const backend::Backend *> backends;
- for (const auto &pair : backend_contexts)
- {
- backends.push_back(pair.first);
- }
- auto et = std::make_shared<exec::ExecTime>(backends);
- std::unique_ptr<exec::IExecutionObserver> obs =
- std::make_unique<exec::ProfileObserver>(et, dataflow_exec->graph());
- dataflow_exec->addObserver(std::move(obs));
+ auto &op_ind = pair.first;
+ auto &tn_seq = pair.second;
+ auto &op = lowered_graph->trainable_graph().operation(op_ind);
+ auto lower_info = lowered_graph->lower_info().operation.getRawPtr(op_ind);
+
+ assert(code_map.find(op_ind) == code_map.end());
+ code_map.insert(
+ {op_ind, train::TrainableCodeAndInfo{op_ind, &op, lower_info, std::move(tn_seq)}});
}
- exec = dataflow_exec;
}
- if (!options.trace_filepath.empty())
+ if (order.size() != code_map.size())
+ {
+ throw std::runtime_error("ExecutorFactory: Some kernels are not generated");
+ }
+
+ auto exec = new exec::train::TrainableExecutor{std::move(lowered_graph),
+ std::move(tbackend_contexts),
+ tensor_regs,
+ std::move(code_map),
+ order,
+ tracing_ctx};
+
+ if (!options->trace_filepath.empty())
{
std::unique_ptr<exec::IExecutionObserver> ctp =
- std::make_unique<exec::ChromeTracingObserver>(options.trace_filepath, exec->graph());
+ std::make_unique<exec::TracingObserver>(options->trace_filepath, exec->graph(), tracing_ctx);
exec->addObserver(std::move(ctp));
}
+ // TODO Support MINMAX_H5DUMPER
return exec;
}
+#endif // ONERT_TRAIN
} // namespace compiler
} // namespace onert
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.h b/runtime/onert/core/src/compiler/ExecutorFactory.h
index b8893c03b..cc621bccf 100644
--- a/runtime/onert/core/src/compiler/ExecutorFactory.h
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.h
@@ -17,18 +17,37 @@
#ifndef __ONERT_COMPILER_EXECUTOR_FACTORY_H__
#define __ONERT_COMPILER_EXECUTOR_FACTORY_H__
-#include <unordered_map>
+#include "TensorRegistries.h"
#include "backend/ITensor.h"
-#include "exec/IExecutor.h"
+
+#ifdef ONERT_TRAIN
+#include "backend/train/TrainableBackendContext.h"
+#endif // ONERT_TRAIN
#include "compiler/LoweredGraph.h"
-#include "TensorRegistries.h"
+#ifdef ONERT_TRAIN
+#include "compiler/train/LoweredTrainableGraph.h"
+#include "exec/train/optimizer/Optimizer.h"
+#endif // ONERT_TRAIN
+#include "exec/IExecutors.h"
+
+#include <deque>
+#include <unordered_map>
namespace onert
{
namespace compiler
{
+// TODO Change to a better name
+struct ExecutorFactoryArgs
+{
+ const util::TracingCtx *tracing_ctx;
+ const compiler::CompilerOptions *options;
+ ir::ModelIndex model_index;
+ std::shared_ptr<backend::custom::IKernelBuilder> custom_kernel_builder;
+};
+
class ExecutorFactory
{
public:
@@ -36,35 +55,56 @@ public:
public:
exec::IExecutor *create(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
- const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::ExecutorMap> &executor_map);
+ const std::shared_ptr<exec::IExecutors> &executors,
+ const ExecutorFactoryArgs &args);
+
+#ifdef ONERT_TRAIN
+ // TODO Unify create()
+ exec::IExecutor *create(std::unique_ptr<compiler::train::LoweredTrainableGraph> lowered_graph,
+ const std::shared_ptr<exec::IExecutors> &executors,
+ const ExecutorFactoryArgs &args,
+ const std::shared_ptr<exec::train::optimizer::Optimizer> &optimizer);
+#endif // ONERT_TRAIN
private:
ExecutorFactory();
private:
- static void initializeBackendContext(compiler::LoweredGraph *lowered_graph);
- static void runTensorRegistration(compiler::LoweredGraph *lowered_graph,
- const std::vector<ir::OpSequenceIndex> &order);
- static std::vector<std::shared_ptr<backend::ITensor>>
- initializeModelIOTensors(compiler::LoweredGraph &lowered_graph,
- const ir::OperandIndexSequence &indices);
- static void prepareExternalTensors(compiler::LoweredGraph &lowered_graph);
+ static void prepareMigrantTensors(compiler::ILoweredGraph &lowered_graph,
+ const backend::BackendContexts &backend_contexts);
+ static void prepareBuiltinBackend(const TensorRegistries &tensor_regs,
+ const std::shared_ptr<exec::IExecutors> &executors,
+ const backend::BackendContexts &backend_contexts,
+ const ir::ModelIndex &index);
+ static std::deque<std::pair<const backend::Backend *, backend::BackendContext *>>
+ orderBackendContext(const backend::BackendContexts &backend_contexts);
+
static exec::IExecutor *
createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
- const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::ExecutorMap> &executor_map);
+ const std::shared_ptr<exec::IExecutors> &executors,
+ const ExecutorFactoryArgs &args);
static exec::IExecutor *
createDataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
- const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel);
+ const std::shared_ptr<exec::IExecutors> &executors,
+ const ExecutorFactoryArgs &args, bool parallel);
+#ifdef ONERT_TRAIN
+ // TODO Unify prepareMigrantTensors
+ static void
+ prepareMigrantTensors(compiler::ILoweredGraph &lowered_graph,
+ const backend::train::TrainableBackendContexts &backend_contexts);
+ static exec::IExecutor *
+ createTrainableExecutor(std::unique_ptr<compiler::train::LoweredTrainableGraph> lowered_graph,
+ const std::shared_ptr<exec::IExecutors> &executors,
+ const ExecutorFactoryArgs &args,
+ const std::shared_ptr<exec::train::optimizer::Optimizer> &optimizer);
+#endif // ONERT_TRAIN
private:
- std::unordered_map<std::string, std::function<exec::IExecutor *(
- std::unique_ptr<compiler::LoweredGraph>,
- const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::ExecutorMap> &executor_map)>>
- _map;
+ std::unordered_map<
+ std::string, std::function<exec::IExecutor *(std::unique_ptr<compiler::LoweredGraph>,
+ const std::shared_ptr<exec::IExecutors> &executors,
+ const ExecutorFactoryArgs &args)>>
+ _map;
};
} // namespace compiler
diff --git a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
index 23a6a253d..ce9b09c2d 100644
--- a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
+++ b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
@@ -14,6 +14,8 @@
* limitations under the License.
*/
+#if 0 // This file is temporarily unused
+
#include "Fp32ToFp16Converter.h"
#include "ir/operation/ConvertFp32ToFp16.h"
#include "ir/operation/ConvertFp16ToFp32.h"
@@ -45,7 +47,7 @@ namespace compiler
{
Fp32ToFp16Converter::Fp32ToFp16Converter(compiler::LoweredGraph &lowered_graph)
- : _lowered_graph{lowered_graph}
+ : _lowered_graph{lowered_graph}
{
VERBOSE(Fp32ToFp16Converter) << "Fp16 Enable on" << std::endl;
}
@@ -177,26 +179,26 @@ void Fp32ToFp16Converter::run()
void Fp32ToFp16Converter::appendOpSequences()
{
_lowered_graph.op_seqs().iterate(
- [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) {
- const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
- assert(lower_info != nullptr);
-
- // For now, the only acl_cl supports fully fp16 type
- // TODO Support fp16 on acl_neon. Current acl_neon supports the only reshape and concat
- // operations.
- // To do this, we could check the support by `operation by operation`. After that, we
- // would partition an op_seq if it contains unsupported operations.
- if (lower_info->backend()->config()->id() != kAclClBackendConfigId)
- return;
-
- // OpSeq's input set should be included in the first operation's input set or
- // OpSeq's output set should be included in the last operation's output set
- assert(checkOperandsOfOpSequence(op_seq));
-
- // Append converting OpSequence for fp16 but all operands' types are not fp16 still.
- appendNewOpSeqForConvertFp32ToFp16(op_seq_ind, op_seq);
- appendNewOpSeqForConvertFp16ToFp32(op_seq_ind, op_seq);
- });
+ [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) {
+ const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
+ assert(lower_info != nullptr);
+
+ // For now, the only acl_cl supports fully fp16 type
+ // TODO Support fp16 on acl_neon. Current acl_neon supports the only reshape and concat
+ // operations.
+ // To do this, we could check the support by `operation by operation`. After that, we
+ // would partition an op_seq if it contains unsupported operations.
+ if (lower_info->backend()->config()->id() != kAclClBackendConfigId)
+ return;
+
+ // OpSeq's input set should be included in the first operation's input set or
+ // OpSeq's output set should be included in the last operation's output set
+ assert(checkOperandsOfOpSequence(op_seq));
+
+ // Append converting OpSequence for fp16 but all operands' types are not fp16 still.
+ appendNewOpSeqForConvertFp32ToFp16(op_seq_ind, op_seq);
+ appendNewOpSeqForConvertFp16ToFp32(op_seq_ind, op_seq);
+ });
}
//
@@ -253,7 +255,7 @@ void Fp32ToFp16Converter::appendNewOpSeqForConvertFp32ToFp16(const ir::OpSequenc
const auto new_op_seq_ind = newOpSequence(op_seq_ind, new_node_ind);
// set new lower_info for op_seq
- setNewOpSequenceLowerInfo(op_seq_ind, new_op_seq_ind);
+ setNewOperationLowerInfo(op_seq_ind, new_op_seq_ind);
_list_fp32_to_fp16.insert(new_op_seq_ind);
@@ -326,7 +328,7 @@ void Fp32ToFp16Converter::appendNewOpSeqForConvertFp16ToFp32(const ir::OpSequenc
auto new_op_seq_ind = newOpSequence(op_seq_ind, new_node_ind);
// set new lower_info for op_seq
- setNewOpSequenceLowerInfo(op_seq_ind, new_op_seq_ind);
+ setNewOperationLowerInfo(op_seq_ind, new_op_seq_ind);
_list_fp16_to_fp32.insert(new_op_seq_ind);
@@ -372,16 +374,16 @@ void Fp32ToFp16Converter::optimize()
void Fp32ToFp16Converter::convertOperands()
{
_lowered_graph.op_seqs().iterate(
- [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) {
- const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
- assert(lower_info != nullptr);
- // For now, the only acl_cl supports fully fp16
- if (lower_info->backend()->config()->id() != kAclClBackendConfigId)
- return;
-
- // Convert input,output operands' type to fp16
- convertOperandsOfOpSequence(op_seq);
- });
+ [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) {
+ const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
+ assert(lower_info != nullptr);
+ // For now, the only acl_cl supports fully fp16
+ if (lower_info->backend()->config()->id() != kAclClBackendConfigId)
+ return;
+
+ // Convert input,output operands' type to fp16
+ convertOperandsOfOpSequence(op_seq);
+ });
}
void Fp32ToFp16Converter::convertOperandsOfOpSequence(ir::OpSequence &op_seq)
@@ -391,10 +393,10 @@ void Fp32ToFp16Converter::convertOperandsOfOpSequence(ir::OpSequence &op_seq)
const auto &op_seq_inputs = _lowered_graph.graph().getInputs();
const auto &op_seq_outputs = _lowered_graph.graph().getOutputs();
- for (auto &op_idx : op_seq)
+ for (const auto &op_idx : op_seq)
{
const auto &node = operations.at(op_idx);
- for (auto &ind : node.getInputs() | ir::Remove::UNDEFINED)
+ for (const auto &ind : node.getInputs() | ir::Remove::UNDEFINED)
{
if (node.opcode() == ir::OpCode::ConvertFp32ToFp16 || op_seq_inputs.contains(ind))
continue;
@@ -405,10 +407,10 @@ void Fp32ToFp16Converter::convertOperandsOfOpSequence(ir::OpSequence &op_seq)
obj.type(ir::DataType::FLOAT16);
- VERBOSE(Fp32ToFp16Converter) << "Input Operand #" << ind.value() << ": fp16" << std::endl;
+ VERBOSE(Fp32ToFp16Converter) << "Input Operand " << ind << ": fp16" << std::endl;
}
- for (auto &ind : node.getOutputs())
+ for (const auto &ind : node.getOutputs())
{
if (node.opcode() == ir::OpCode::ConvertFp16ToFp32 || op_seq_outputs.contains(ind))
continue;
@@ -419,7 +421,7 @@ void Fp32ToFp16Converter::convertOperandsOfOpSequence(ir::OpSequence &op_seq)
obj.type(ir::DataType::FLOAT16);
- VERBOSE(Fp32ToFp16Converter) << "Output Operand #" << ind.value() << ": fp16" << std::endl;
+ VERBOSE(Fp32ToFp16Converter) << "Output Operand " << ind << ": fp16" << std::endl;
}
}
}
@@ -444,7 +446,7 @@ void Fp32ToFp16Converter::convertDatas()
obj.data(std::move(new_data));
obj.type(ir::DataType::FLOAT16);
- VERBOSE(Fp32ToFp16Converter) << "Constant Operand #" << ind.value() << ": fp16" << std::endl;
+ VERBOSE(Fp32ToFp16Converter) << "Constant Operand " << ind << ": fp16" << std::endl;
}
});
}
@@ -513,23 +515,23 @@ ir::OperandIndex Fp32ToFp16Converter::newCopiedOperand(const ir::OperandIndex &o
void Fp32ToFp16Converter::setNewOperandLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
const ir::OperandIndex &new_op_ind)
{
- const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
+ const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
assert(lower_info != nullptr);
- auto new_lower_info = std::make_unique<ir::operand::LowerInfo>();
- auto permute_factor = ir::operand::PermuteFactor(lower_info->backend(), lower_info->layout());
+ auto new_lower_info = std::make_unique<compiler::OperandLowerInfo>();
+ auto permute_factor = compiler::PermuteFactor(lower_info->backend(), lower_info->layout());
new_lower_info->addDefPermuteFactor(permute_factor);
new_lower_info->addUsePermuteFactor(permute_factor);
_lowered_graph.setLowerInfo(new_op_ind, std::move(new_lower_info));
}
-void Fp32ToFp16Converter::setNewOpSequenceLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
- const ir::OpSequenceIndex &new_op_seq_ind)
+void Fp32ToFp16Converter::setNewOperationLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
+ const ir::OpSequenceIndex &new_op_seq_ind)
{
- const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
+ const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
assert(lower_info != nullptr);
auto new_lower_info =
- std::make_unique<ir::operation::LowerInfo>(lower_info->backend(), lower_info->layout());
+ std::make_unique<compiler::OperationLowerInfo>(lower_info->backend(), lower_info->layout());
_lowered_graph.setLowerInfo(new_op_seq_ind, std::move(new_lower_info));
}
@@ -600,7 +602,7 @@ Fp32ToFp16Converter::newOperationConvertFp32ToFp16(const ir::OperandIndex &op_se
auto &new_op_obj = operands.at(new_op_ind);
std::unique_ptr<ir::Operation> new_node(
- new ir::operation::ConvertFp32ToFp16({op_seq_input_ind}, {new_op_ind}));
+ new ir::operation::ConvertFp32ToFp16({op_seq_input_ind}, {new_op_ind}));
const auto new_node_ind = operations.push(std::move(new_node));
input_obj.insertUse(new_node_ind);
@@ -620,7 +622,7 @@ Fp32ToFp16Converter::newOperationConvertFp16ToFp32(const ir::OperandIndex &op_se
auto &new_op_obj = operands.at(new_op_ind);
std::unique_ptr<ir::Operation> new_node(
- new ir::operation::ConvertFp16ToFp32({new_op_ind}, {op_seq_output_ind}));
+ new ir::operation::ConvertFp16ToFp32({new_op_ind}, {op_seq_output_ind}));
const auto new_node_ind = operations.push(std::move(new_node));
new_op_obj.insertUse(new_node_ind);
@@ -633,7 +635,7 @@ ir::OpSequenceIndex Fp32ToFp16Converter::newOpSequence(const ir::OpSequenceIndex
const ir::OperationIndex &node_index)
{
auto &node = _lowered_graph.graph().operations().at(node_index);
- const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
+ const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
assert(lower_info != nullptr);
auto layout = lower_info->layout();
@@ -745,7 +747,7 @@ Fp32ToFp16Converter::findOpSequencesContiguous(const InputToOpSeqs &input_to_op_
// | |
// [OPERATION] [OPERATION]
//
- for (auto &op_seq_ind : found_input_in_op_seqs->second)
+ for (const auto &op_seq_ind : found_input_in_op_seqs->second)
{
auto found_in_fp32_to_fp16 = _list_fp32_to_fp16.find(op_seq_ind);
if (found_in_fp32_to_fp16 != _list_fp32_to_fp16.end())
@@ -759,9 +761,8 @@ Fp32ToFp16Converter::findOpSequencesContiguous(const InputToOpSeqs &input_to_op_
opseq_map_to_delete[op_seq_ind_fp16_to_fp32].insert(op_seq_ind);
}
- VERBOSE(Fp32ToFp16Converter)
- << "Contiguous from OpSeq#" << op_seq_ind_fp16_to_fp32.value() << "(ToFp32)"
- << " to OpSeq#" << op_seq_ind.value() << "(ToFp16)" << std::endl;
+ VERBOSE(Fp32ToFp16Converter) << "Contiguous from " << op_seq_ind_fp16_to_fp32 << "(ToFp32)"
+ << " to " << op_seq_ind << "(ToFp16)" << std::endl;
}
}
}
@@ -775,7 +776,7 @@ Fp32ToFp16Converter::InputToOpSeqs Fp32ToFp16Converter::prepareInputToOpSeqs() c
InputToOpSeqs input_to_op_seqs;
op_seqs.iterate([&](const ir::OpSequenceIndex &op_seq_idx, const ir::OpSequence &op_seq) {
- for (auto input : op_seq.getInputs() | ir::Remove::UNDEFINED)
+ for (auto &&input : op_seq.getInputs() | ir::Remove::UNDEFINED)
{
auto it = input_to_op_seqs.find(input);
if (it == input_to_op_seqs.end())
@@ -798,13 +799,13 @@ Fp32ToFp16Converter::getListOpSequences(const OpSeqIndexToOpSeqIndexList &opseq_
OpSeqIndexList list;
for (const auto &it : opseq_map_to_delete)
{
- auto &opseq_ind_fp16_to_fp32 = it.first;
+ const auto &opseq_ind_fp16_to_fp32 = it.first;
if (list.find(opseq_ind_fp16_to_fp32) == list.end())
{
list.emplace(opseq_ind_fp16_to_fp32);
}
- for (auto &opseq_ind_fp32_to_fp16 : it.second)
+ for (const auto &opseq_ind_fp32_to_fp16 : it.second)
{
if (list.find(opseq_ind_fp32_to_fp16) == list.end())
{
@@ -842,7 +843,7 @@ Fp32ToFp16Converter::findOperationsToDelete(const OpSeqIndexList &list_to_delete
}
void Fp32ToFp16Converter::manipulateContiguousOpSequences(
- const InputToOpSeqs &input_to_op_seqs, const OpSeqIndexToOpSeqIndexList &opseq_map_to_delete)
+ const InputToOpSeqs &input_to_op_seqs, const OpSeqIndexToOpSeqIndexList &opseq_map_to_delete)
{
auto &op_seqs = _lowered_graph.op_seqs();
@@ -861,14 +862,14 @@ void Fp32ToFp16Converter::manipulateContiguousOpSequences(
// |
// [OPERATION] // op_seq_ind_next_to_fp16
//
- for (auto it : opseq_map_to_delete)
+ for (auto &&it : opseq_map_to_delete)
{
// fp16_to_fp32's input/output num is always 1
auto &op_seq_ind_fp16_to_fp32 = it.first;
auto &op_seq_fp16_to_fp32 = op_seqs.at(op_seq_ind_fp16_to_fp32);
auto &input_ind_fp16_to_fp32 = op_seq_fp16_to_fp32.getInputs().at(0);
- for (auto &op_seq_ind_fp32_to_fp16 : it.second)
+ for (const auto &op_seq_ind_fp32_to_fp16 : it.second)
{
auto &op_seq_fp32_to_fp16 = op_seqs.at(op_seq_ind_fp32_to_fp16);
assert(op_seq_fp32_to_fp16.size() == 1);
@@ -878,7 +879,7 @@ void Fp32ToFp16Converter::manipulateContiguousOpSequences(
auto found_next_to_fp16 = input_to_op_seqs.find(output_ind_fp32_to_fp16);
assert(found_next_to_fp16 != input_to_op_seqs.end());
- for (auto &op_seq_ind_next_to_fp16 : found_next_to_fp16->second)
+ for (const auto &op_seq_ind_next_to_fp16 : found_next_to_fp16->second)
{
manipulateInput(op_seq_ind_next_to_fp16, output_ind_fp32_to_fp16, input_ind_fp16_to_fp32);
}
@@ -894,61 +895,62 @@ void Fp32ToFp16Converter::manipulateContiguousOpSequences(
}
void Fp32ToFp16Converter::deleteContiguousOpSequences(
- const OpSeqIndexList &list_to_delete_op_seqs,
- const ir::OperandIndexSequence &list_to_delete_ops)
+ const OpSeqIndexList &list_to_delete_op_seqs, const ir::OperandIndexSequence &list_to_delete_ops)
{
auto &operands = _lowered_graph.graph().operands();
auto &operations = _lowered_graph.graph().operations();
auto &op_seqs = _lowered_graph.op_seqs();
- for (auto &op_seq_ind : list_to_delete_op_seqs)
+ for (const auto &op_seq_ind : list_to_delete_op_seqs)
{
auto &op_seq = op_seqs.at(op_seq_ind);
assert(op_seq.size() == 1);
- VERBOSE(Fp32ToFp16Converter) << "Delete OpSeq #" << op_seq_ind.value() << std::endl;
+ VERBOSE(Fp32ToFp16Converter) << "Delete OpSeq " << op_seq_ind << std::endl;
auto &first_node_ind = op_seq.operations().at(0);
auto &first_node = operations.at(first_node_ind);
assert(first_node.opcode() == ir::OpCode::ConvertFp32ToFp16 ||
first_node.opcode() == ir::OpCode::ConvertFp16ToFp32);
- VERBOSE(Fp32ToFp16Converter) << "Delete Node #" << first_node_ind.value() << std::endl;
+ VERBOSE(Fp32ToFp16Converter) << "Delete Node " << first_node_ind << std::endl;
// Uses
- for (auto &ind : first_node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
+ for (const auto &ind : first_node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
{
auto &obj = operands.at(ind);
obj.removeUse(first_node_ind);
- VERBOSE(Fp32ToFp16Converter) << "Operand #" << ind.value() << "'s Use(Node#"
- << first_node_ind.value() << ") is removed" << std::endl;
+ VERBOSE(Fp32ToFp16Converter)
+ << "Operand " << ind << "'s Use(Node" << first_node_ind << ") is removed" << std::endl;
}
// Def
- for (auto &ind : first_node.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
+ for (const auto &ind : first_node.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
{
auto &obj = operands.at(ind);
assert(obj.getDef() == first_node_ind);
obj.unsetDef();
- VERBOSE(Fp32ToFp16Converter) << "Operand #" << ind.value() << "'s Def(Node#"
- << first_node_ind.value() << ") is removed" << std::endl;
+ VERBOSE(Fp32ToFp16Converter)
+ << "Operand " << ind << "'s Def(Node" << first_node_ind << ") is removed" << std::endl;
}
// Operation
operations.remove(first_node_ind);
- VERBOSE(Fp32ToFp16Converter) << "Node#" << first_node_ind.value() << " is removed" << std::endl;
+ VERBOSE(Fp32ToFp16Converter) << "Node" << first_node_ind << " is removed" << std::endl;
// OpSequence
op_seqs.remove(op_seq_ind);
- VERBOSE(Fp32ToFp16Converter) << "OpSeq#" << op_seq_ind.value() << " is removed" << std::endl;
+ VERBOSE(Fp32ToFp16Converter) << "OpSeq" << op_seq_ind << " is removed" << std::endl;
}
// Operand
- for (auto &ind : list_to_delete_ops)
+ for (const auto &ind : list_to_delete_ops)
{
operands.remove(ind);
- VERBOSE(Fp32ToFp16Converter) << "Operand #" << ind.value() << " is removed" << std::endl;
+ VERBOSE(Fp32ToFp16Converter) << "Operand " << ind << " is removed" << std::endl;
}
}
} // namespace compiler
} // namespace onert
+
+#endif
diff --git a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h
index eeecb9846..87751ceb4 100644
--- a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h
+++ b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h
@@ -14,6 +14,8 @@
* limitations under the License.
*/
+#if 0 // This file is temporarily unused
+
#ifndef __ONERT_COMPILER_FP32_TO_FP16_CONVERTER_H__
#define __ONERT_COMPILER_FP32_TO_FP16_CONVERTER_H__
@@ -64,8 +66,8 @@ private:
void setNewOperandLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
const ir::OperandIndex &new_op_ind);
- void setNewOpSequenceLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
- const ir::OpSequenceIndex &new_op_seq_ind);
+ void setNewOperationLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
+ const ir::OpSequenceIndex &new_op_seq_ind);
void manipulateInput(const ir::OpSequenceIndex &op_seq_ind,
const ir::OperandIndex &op_seq_input_ind,
@@ -99,3 +101,5 @@ private:
} // namespace onert
#endif // __ONERT_COMPILER_FP32_TO_FP16_CONVERTER_H__
+
+#endif
diff --git a/runtime/onert/core/src/compiler/HEScheduler.cc b/runtime/onert/core/src/compiler/HEScheduler.cc
index 5653b090e..56e2208d6 100644
--- a/runtime/onert/core/src/compiler/HEScheduler.cc
+++ b/runtime/onert/core/src/compiler/HEScheduler.cc
@@ -14,34 +14,32 @@
* limitations under the License.
*/
-#include "ir/Operand.h"
-#include "compiler/HEScheduler.h"
-#include "ir/Graph.h"
-#include "util/ConfigSource.h"
+#include "HEScheduler.h"
+
#include "compiler/BackendResolver.h"
+#include "ir/Graph.h"
#include "util/logging.h"
-#include "util/Utils.h"
-#include "exec/FunctionSequence.h"
+
#include <cassert>
#include <cmath>
-#include <chrono>
-namespace onert
+namespace
{
-namespace compiler
-{
-static uint32_t getOperationsFlattenedIOSize(const ir::Graph &graph, const ir::Operation &node)
+using namespace onert;
+
+uint32_t getOperationsFlattenedIOSize(const ir::Graph &graph, const ir::IOperation &node)
{
uint32_t size = 0;
- for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
+ for (const auto &ind :
+ (node.getInputs() | ir::Remove::UNDEFINED) + (node.getOutputs() | ir::Remove::UNDEFINED))
{
size += graph.operands().at(ind).info().total_size();
}
return size;
}
-static bool isQuant(const ir::Graph &graph, const ir::Operation &node)
+bool isQuant(const ir::Graph &graph, const ir::IOperation &node)
{
for (const auto &input : node.getInputs() | ir::Remove::UNDEFINED)
{
@@ -54,18 +52,17 @@ static bool isQuant(const ir::Graph &graph, const ir::Operation &node)
return false;
}
-static bool isWorkaroundSkip(const ir::Graph &, const backend::Backend *, const ir::Operation &,
- bool)
+bool isWorkaroundSkip(const ir::Graph &, const backend::Backend *, const ir::IOperation &, bool)
{
// Now, there is no workaround
return false;
}
// if a node can be merged into op_seq
-static bool isMergeable(const ir::Graph &graph, const ir::Operation &node)
+bool isMergeable(const ir::Graph &graph, const ir::IOperation &node)
{
size_t prev_op_cnt = 0;
- for (const auto &input : node.getInputs())
+ for (const auto &input : node.getInputs() | ir::Remove::UNDEFINED)
{
// only valid_inputs
const auto &operand = graph.operands().at(input);
@@ -85,15 +82,23 @@ static bool isMergeable(const ir::Graph &graph, const ir::Operation &node)
return true;
}
+} // namespace
+
+namespace onert
+{
+
+namespace compiler
+{
+
void HEScheduler::scheduleShufflingBackends()
{
VERBOSE(HEScheduler::schedule)
- << "Started task scheduling: uses all backends to get more metrics for data transfer"
- << std::endl;
+ << "Started task scheduling: uses all backends to get more metrics for data transfer"
+ << std::endl;
size_t backend_ind = 0;
for (const auto &rank : _rank_to_op)
{
- VERBOSE(HEScheduler::schedule) << "scheduling (" << rank.second.value() << ")" << std::endl;
+ VERBOSE(HEScheduler::schedule) << "scheduling (" << rank.second << ")" << std::endl;
const auto &node = _graph->operations().at(rank.second);
const bool quant = isQuant(*_graph, node);
const auto size = getOperationsFlattenedIOSize(*_graph, node);
@@ -115,7 +120,7 @@ void HEScheduler::scheduleShufflingBackends()
continue;
}
const auto exec_time =
- _exec_time->getOperationExecTime(_all_backends[backend_ind], node.name(), quant, size);
+ _exec_time->getOperationExecTime(_all_backends[backend_ind], node.name(), quant, size);
// Scheduling to measure data transfer must be done after measuring all backends separately
assert(exec_time != _exec_time->NOT_FOUND);
if (exec_time == _exec_time->getMax())
@@ -132,7 +137,7 @@ void HEScheduler::scheduleShufflingBackends()
}
}
-bool HEScheduler::isNodeProfiled(const ir::Operation &node)
+bool HEScheduler::isNodeProfiled(const ir::IOperation &node)
{
const bool quant = isQuant(*_graph, node);
const auto size = getOperationsFlattenedIOSize(*_graph, node);
@@ -202,7 +207,7 @@ std::unique_ptr<compiler::BackendResolver> HEScheduler::schedule(const ir::Graph
{
// Check if profiling info about all backend/node pairs already exists
bool all_nodes_are_profiled = true;
- _graph->operations().iterate([&](const ir::OperationIndex &, const ir::Operation &op) {
+ _graph->operations().iterate([&](const ir::OperationIndex &, const ir::IOperation &op) {
if (all_nodes_are_profiled)
all_nodes_are_profiled = isNodeProfiled(op);
});
@@ -219,7 +224,7 @@ std::unique_ptr<compiler::BackendResolver> HEScheduler::schedule(const ir::Graph
ir::OperationIndexMap<bool> visited;
graph.operations().iterate(
- [&](const ir::OperationIndex &index, const ir::Operation &) { visited[index] = false; });
+ [&](const ir::OperationIndex &index, const ir::IOperation &) { visited[index] = false; });
// for each task select the backend with the smallest earliest finishing time(eft)
for (const auto &rank : _rank_to_op)
{
@@ -248,19 +253,20 @@ int64_t HEScheduler::getPermuteTime(const backend::Backend *src_backend,
if (time != _exec_time->NOT_FOUND)
return time;
+ // FIXME permute time is not recorded so the control reaches here always
// Makes the scheduler prefer keeping computations on one backend
- return size / 200;
+ return size / 400;
}
-int64_t HEScheduler::tryBackend(const ir::Operation &node, const backend::Backend *backend)
+int64_t HEScheduler::tryBackend(const ir::IOperation &node, const backend::Backend *backend)
{
// if there is no profiling info don't use this backend during scheduling
if (!_is_profiling_mode)
{
VERBOSE(HEScheduler::tryBackend)
- << "Trying to HE schedule while there is no profiling info for " << node.name()
- << " on backend " << backend->config()->id() << ". So this backend won't be used. "
- << std::endl;
+ << "Trying to HE schedule while there is no profiling info for " << node.name()
+ << " on backend " << backend->config()->id() << ". So this backend won't be used. "
+ << std::endl;
_is_supported[backend][node.name()] = false;
return _exec_time->getMax();
}
@@ -291,10 +297,10 @@ void HEScheduler::makeRank()
VERBOSE(HEScheduler::makeRank) << "task prioritizing" << std::endl;
_graph->operations().iterate(
- [&](const ir::OperationIndex &index, const ir::Operation &) { DFSMaxRank(index); });
+ [&](const ir::OperationIndex &index, const ir::IOperation &) { DFSMaxRank(index); });
// Check that ranks are calculated for all operations(nodes)
- _graph->operations().iterate([&](const ir::OperationIndex &index, const ir::Operation &) {
+ _graph->operations().iterate([&](const ir::OperationIndex &index, const ir::IOperation &) {
UNUSED_RELEASE(index);
assert(_op_to_rank->find(index) != _op_to_rank->end());
});
@@ -360,8 +366,8 @@ int64_t HEScheduler::DFSMaxRank(const ir::OperationIndex &index)
assert(rank >= 0);
_rank_to_op.emplace(rank, index);
_op_to_rank->emplace(index, rank);
- VERBOSE(HEScheduler::DFSMaxRank) << "rank of operation (" << index.value() << ")" << node.name()
- << " is " << rank << std::endl;
+ VERBOSE(HEScheduler::DFSMaxRank)
+ << "rank of operation (" << index << ")" << node.name() << " is " << rank << std::endl;
return rank;
}
@@ -370,7 +376,7 @@ int64_t HEScheduler::DFSChildrenMaxRank(const ir::OperationIndex &index)
{
const auto &node = _graph->operations().at(index);
int64_t max_child_rank = 0;
- for (const auto &output : node.getOutputs())
+ for (const auto &output : node.getOutputs() | ir::Remove::UNDEFINED)
{
const auto &operand = _graph->operands().at(output);
const bool quant = operand.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM;
@@ -384,9 +390,9 @@ int64_t HEScheduler::DFSChildrenMaxRank(const ir::OperationIndex &index)
{
continue;
}
- // TODO Change it to controlflow backend
+ // TODO Change it to builtin backend
auto transfer_cost =
- getPermuteTime(backend, other_backend, quant, operand.info().total_size());
+ getPermuteTime(backend, other_backend, quant, operand.info().total_size());
avg_transfer_cost += transfer_cost;
}
}
@@ -403,7 +409,7 @@ int64_t HEScheduler::DFSChildrenMaxRank(const ir::OperationIndex &index)
int64_t HEScheduler::backendAvailableTime(const backend::Backend *backend,
const int64_t &starting_time, const int64_t &time_amount)
{
- const auto backend_times = _backends_avail_time.at(backend);
+ const auto &backend_times = _backends_avail_time.at(backend);
// finishing and starting times of an op, that will come after current op
auto next_op_fst = backend_times.upper_bound(starting_time);
// finishing time of an op, that will come before current op
@@ -419,7 +425,7 @@ int64_t HEScheduler::backendAvailableTime(const backend::Backend *backend,
bool HEScheduler::schedule(const ir::OperationIndex &index, const backend::Backend *parent_backend)
{
- VERBOSE(HEScheduler::schedule) << "scheduling (" << index.value() << ")" << std::endl;
+ VERBOSE(HEScheduler::schedule) << "scheduling (" << index << ")" << std::endl;
int64_t eft = std::numeric_limits<int64_t>::max(), selected_exec_time = 0;
const auto &node = _graph->operations().at(index);
@@ -506,7 +512,7 @@ HEScheduler::ESTAndExecTime(const backend::Backend *backend, const ir::Operation
// Find free time for data transferring and insert it into backend taskset. This is needed:
// 1. Time for multiple permutations for this node's input is found correctly
// 2. If backend==cpu, then free time for this node must come after permutations
- for (auto &it : transfer_st_exec_time)
+ for (auto &&it : transfer_st_exec_time)
{
if (_is_parallel_exec)
{
@@ -542,27 +548,27 @@ HEScheduler::ESTAndExecTime(const backend::Backend *backend, const ir::Operation
if (!_is_parallel_exec)
{
VERBOSE(HEScheduler::ESTAndExecTime)
- << "exec_time of (" << index.value() << ") " << node.name() << " quant==" << quant << " on "
- << backend->config()->id() << " is " << exec_time
- << " microseconds. Data transfer cost: " << total_transfer_cost << std::endl;
+ << "exec_time of (" << index << ") " << node.name() << " quant==" << quant << " on "
+ << backend->config()->id() << " is " << exec_time
+ << " microseconds. Data transfer cost: " << total_transfer_cost << std::endl;
return {total_transfer_cost, exec_time};
}
VERBOSE(HEScheduler::ESTAndExecTime)
- << "exec_time of (" << index.value() << ") " << node.name() << " quant==" << quant << " on "
- << backend->config()->id() << ": " << exec_time
- << " microseconds. Backend available time: " << prev_op_ft
- << " Parent's max eft: " << max_pred_eft - total_transfer_cost
- << " data transfer cost: " << total_transfer_cost << std::endl;
+ << "exec_time of (" << index << ") " << node.name() << " quant==" << quant << " on "
+ << backend->config()->id() << ": " << exec_time
+ << " microseconds. Backend available time: " << prev_op_ft
+ << " Parent's max eft: " << max_pred_eft - total_transfer_cost
+ << " data transfer cost: " << total_transfer_cost << std::endl;
return {prev_op_ft, exec_time};
}
-int64_t HEScheduler::predMaxEFT(const backend::Backend *backend, const ir::Operation &node,
+int64_t HEScheduler::predMaxEFT(const backend::Backend *backend, const ir::IOperation &node,
std::multimap<int64_t, int64_t> &transfer_st_exec_time)
{
int64_t max_pred_eft = 0;
- for (const auto &input_operand_idx : node.getInputs())
+ for (const auto &input_operand_idx : node.getInputs() | ir::Remove::UNDEFINED)
{
const auto &input_operand = _graph->operands().at(input_operand_idx);
const bool quant = input_operand.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM;
@@ -578,7 +584,7 @@ int64_t HEScheduler::predMaxEFT(const backend::Backend *backend, const ir::Opera
{
// Multiply operand size by 2 because size must describe input+output size
int64_t transfer_cost =
- getPermuteTime(parent_backend, backend, quant, input_operand.info().total_size() * 2);
+ getPermuteTime(parent_backend, backend, quant, input_operand.info().total_size() * 2);
transfer_st_exec_time.emplace(_ops_eft.at(input_node_idx), transfer_cost);
}
}
diff --git a/runtime/onert/core/src/compiler/HEScheduler.h b/runtime/onert/core/src/compiler/HEScheduler.h
index b9cee5881..df6c07926 100644
--- a/runtime/onert/core/src/compiler/HEScheduler.h
+++ b/runtime/onert/core/src/compiler/HEScheduler.h
@@ -23,14 +23,16 @@
#ifndef __ONERT_COMPILER_H_E_SCHEDULER_H_
#define __ONERT_COMPILER_H_E_SCHEDULER_H_
-#include "compiler/IScheduler.h"
-#include "compiler/BackendManager.h"
-#include "compiler/Compiler.h"
-#include "ir/Graph.h"
-#include "exec/ExecTime.h"
-#include "backend/Backend.h"
-#include <memory>
-#include "ir/OperationIndexMap.h"
+#include "IScheduler.h"
+#include "../backend/builtin/Config.h"
+#include "../exec/ExecTime.h"
+
+#include <backend/Backend.h>
+#include <compiler/BackendManager.h>
+#include <compiler/Compiler.h>
+#include <ir/Graph.h>
+#include <ir/OperationIndexMap.h>
+
#include <map>
#include <memory>
@@ -50,26 +52,26 @@ public:
* @param[in] model Graph model
* @param[in] backend_resolver backend resolver
*/
- HEScheduler(const backend::BackendContexts &backend_contexts, const CompilerOptions &options)
- : _is_supported{}, _backends_avail_time{}, _ops_eft{},
- _op_to_rank{std::make_shared<ir::OperationIndexMap<int64_t>>()},
- _is_profiling_mode{options.he_profiling_mode},
- _is_linear_exec{options.executor == "Linear"},
- _is_parallel_exec{options.executor == "Parallel"}
+ HEScheduler(const std::vector<const backend::Backend *> &backends, const CompilerOptions &options)
+ : _is_supported{}, _backends_avail_time{}, _ops_eft{},
+ _op_to_rank{std::make_shared<ir::OperationIndexMap<int64_t>>()},
+ _is_profiling_mode{options.he_profiling_mode}, _is_linear_exec{options.executor == "Linear"},
+ _is_parallel_exec{options.executor == "Parallel"}
{
- for (auto &entry : backend_contexts)
+ for (auto &&entry : backends)
{
- if (entry.first->config()->id() == backend::controlflow::Config::ID)
+ if (entry->config()->id() == backend::builtin::Config::ID)
continue;
- _all_backends.push_back(entry.first);
+ _all_backends.push_back(entry);
}
_backend_resolver = std::make_unique<compiler::BackendResolver>();
_exec_time = std::make_unique<exec::ExecTime>(_all_backends);
// Find cpu backend
- auto cpu_backend_it = std::find_if(
- _all_backends.begin(), _all_backends.end(),
- [](const backend::Backend *backend) { return backend->config()->id() == "cpu"; });
+ auto cpu_backend_it =
+ std::find_if(_all_backends.begin(), _all_backends.end(), [](const backend::Backend *backend) {
+ return backend->config()->id() == "cpu";
+ });
if (cpu_backend_it == _all_backends.end())
throw std::runtime_error("HEScheduler could be used only if 'cpu' backend is available");
_cpu_backend = *cpu_backend_it;
@@ -86,7 +88,7 @@ public:
std::shared_ptr<ir::OperationIndexMap<int64_t>> getIndexedRanks() { return _op_to_rank; }
private:
- bool isNodeProfiled(const ir::Operation &);
+ bool isNodeProfiled(const ir::IOperation &);
bool schedule(const ir::OperationIndex &, const backend::Backend *parent_backend);
/**
@@ -113,7 +115,7 @@ private:
*
* @return earliest finishing time of parent nodes
*/
- int64_t predMaxEFT(const backend::Backend *backend, const ir::Operation &node,
+ int64_t predMaxEFT(const backend::Backend *backend, const ir::IOperation &node,
std::multimap<int64_t, int64_t> &transfer_st_exec_time);
void makeRank();
@@ -144,7 +146,7 @@ private:
void scheduleShufflingBackends();
- int64_t tryBackend(const ir::Operation &node, const backend::Backend *backend);
+ int64_t tryBackend(const ir::IOperation &node, const backend::Backend *backend);
/**
* @brief Schedule a node and its successor until:
@@ -173,7 +175,7 @@ private:
std::unique_ptr<exec::ExecTime> _exec_time;
const ir::Graph *_graph{nullptr};
std::vector<const backend::Backend *> _all_backends;
- const backend::Backend *_cpu_backend{nullptr}; // TODO Change this to controlflow_backend
+ const backend::Backend *_cpu_backend{nullptr}; // TODO Change this to _builtin_backend
bool _is_profiling_mode;
bool _is_linear_exec;
bool _is_parallel_exec;
diff --git a/runtime/onert/core/src/compiler/HEScheduler.test.cc b/runtime/onert/core/src/compiler/HEScheduler.test.cc
new file mode 100644
index 000000000..1654bfc8b
--- /dev/null
+++ b/runtime/onert/core/src/compiler/HEScheduler.test.cc
@@ -0,0 +1,572 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "HEScheduler.h"
+#include "../exec/ExecTime.h"
+
+#include <ir/DataType.h>
+#include <ir/InternalType.h>
+#include <ir/Shape.h>
+#include <ir/TypeInfo.h>
+#include <ir/operation/BinaryArithmetic.h>
+#include <ir/operation/FullyConnected.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+using namespace onert;
+using namespace ir;
+using namespace backend;
+using namespace operation;
+using namespace exec;
+
+//
+// Mock backends classes
+//
+
+struct MockConfigCPU : public IConfig
+{
+ std::string id() override { return "cpu"; }
+ bool initialize() override { return true; };
+ bool supportPermutation() override { return false; }
+ Layout supportLayout(const IOperation &, Layout) override { return Layout::UNKNOWN; }
+ bool supportDynamicTensor() override { return false; }
+ bool supportFP16() override { return false; }
+};
+
+class MockBackendContext : public BackendContext
+{
+public:
+ using BackendContext::BackendContext;
+ ITensorRegistry *genTensors() override { return nullptr; }
+ FunctionMap genKernels() override { return {}; }
+};
+
+struct MockBackendCPU : public Backend
+{
+ std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigCPU>(); }
+ std::unique_ptr<BackendContext> newContext(ContextData &&data) const override
+ {
+ return std::make_unique<MockBackendContext>(this, std::move(data), nullptr);
+ }
+};
+
+struct MockConfigGPU : public IConfig
+{
+ std::string id() override { return "gpu"; }
+ bool initialize() override { return true; };
+ bool supportPermutation() override { return false; }
+ ir::Layout supportLayout(const ir::IOperation &, ir::Layout) override
+ {
+ return ir::Layout::UNKNOWN;
+ }
+ bool supportDynamicTensor() override { return false; }
+ bool supportFP16() override { return false; }
+};
+
+struct MockBackendGPU : public Backend
+{
+ std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigGPU>(); }
+ std::unique_ptr<BackendContext> newContext(ContextData &&data) const override
+ {
+ return std::make_unique<MockBackendContext>(this, std::move(data), nullptr);
+ }
+};
+
+struct MockConfigNPU : public IConfig
+{
+ std::string id() override { return "npu"; }
+ bool initialize() override { return true; };
+ bool supportPermutation() override { return false; }
+ ir::Layout supportLayout(const ir::IOperation &, ir::Layout) override
+ {
+ return ir::Layout::UNKNOWN;
+ }
+ bool supportDynamicTensor() override { return false; }
+ bool supportFP16() override { return false; }
+};
+
+struct MockBackendNPU : public Backend
+{
+ std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigNPU>(); }
+ std::unique_ptr<BackendContext> newContext(ContextData &&data) const override
+ {
+ return std::make_unique<MockBackendContext>(this, std::move(data), nullptr);
+ }
+};
+
+//
+// Constants
+//
+
+const int OPERAND_ELEMS = 268203;
+const int OPERAND_SIZE = OPERAND_ELEMS * 4;
+const int OPERATION_SIZE = OPERAND_SIZE * 3;
+
+const std::string LINEAR("Linear");
+const std::string DATAFLOW("Dataflow");
+const std::string PARALLEL("Parallel");
+
+//
+// Helper functions
+//
+
+// Set executor through environment variable
+void setExecutor(const std::string &executor) { setenv("EXECUTOR", executor.c_str(), true); }
+
+// Set profiling mode through environment variable
+void setProfilingMode(const bool value) { setenv("PROFILING_MODE", value ? "1" : "0", true); }
+
+// Calculate operation size by addition sizes of all input and output operands
+uint32_t calcOpSize(const std::shared_ptr<Graph> &graph, const OperationIndex &op_idx)
+{
+ uint32_t size = 0;
+ const auto &op = graph->operations().at(op_idx);
+ for (const auto &ind : op.getInputs() + op.getOutputs())
+ size += graph->operands().at(ind).info().total_size();
+ return size;
+}
+
+// Set execution operation time. This method is needed since ExecutionTime has only
+// 'updateOperationExecTime' method.
+void setOperationExecTime(ExecTime &et, const Backend *backend, const std::string &operation,
+ bool quant, uint32_t op_size, int64_t time)
+{
+ // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it
+ assert(time > 0);
+ int64_t prev_time = et.getOperationExecTime(backend, operation, quant, op_size);
+ int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time;
+ et.updateOperationExecTime(backend, operation, quant, op_size, time_to_set);
+ assert(et.getOperationExecTime(backend, operation, quant, op_size) == time);
+}
+
+// Set same execution time for all given backends/operations
+void setOperationsExecutionTime(const std::vector<const Backend *> &backends,
+ const std::vector<std::string> &op_names,
+ const std::vector<uint32_t> &op_sizes, int64_t exec_time)
+{
+ assert(op_names.size() == op_sizes.size());
+ ExecTime et(backends);
+ for (int i = 0; i < op_names.size(); ++i)
+ {
+ for (const auto backend : backends)
+ setOperationExecTime(et, backend, op_names[i], false, op_sizes[i], exec_time);
+ }
+ et.storeOperationsExecTime();
+}
+
+// Set permute time from one backend to another. This method is needed since ExecutionTime has only
+// 'updatePermuteTime' method.
+void setPermutationTime(ExecTime &et, const Backend *from_backend, const Backend *to_backend,
+ bool quant, uint32_t op_size, int64_t time)
+{
+ // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it
+ assert(time > 0);
+ int64_t prev_time = et.getPermuteTime(from_backend, to_backend, quant, op_size);
+ int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time;
+ et.updatePermuteTime(from_backend, to_backend, quant, op_size, time_to_set);
+ assert(et.getPermuteTime(from_backend, to_backend, quant, op_size) == time);
+}
+
+// Set same permutation time between all given backends
+void setPermutationsExecutionTime(const std::vector<const Backend *> &backends,
+ const int operand_size, const int64_t exec_time)
+{
+ ExecTime et(backends);
+ for (const auto &backend : backends)
+ {
+ for (const auto other_backend : backends)
+ {
+ if (backend == other_backend)
+ continue;
+ setPermutationTime(et, backend, other_backend, false, operand_size, exec_time);
+ }
+ }
+ et.storeOperationsExecTime();
+}
+
+//
+// Functions for creating graphs
+//
+
+using OIS = OperandIndexSequence;
+
+template <typename NodeT, typename... Types>
+OperationIndex create(std::shared_ptr<Graph> graph, Types &&... args)
+{
+ auto op = std::make_unique<NodeT>(std::forward<Types>(args)...);
+ auto op_idx = graph->addOperation(std::move(op));
+ // For now in scheduler test all operations in tested graphs has same size (for simplicity)
+ assert(calcOpSize(graph, op_idx) == OPERATION_SIZE);
+ return op_idx;
+}
+
+// Create straight graph: Add->Sub->Mul
+std::shared_ptr<Graph> createStraightGraph()
+{
+ auto graph = std::make_shared<Graph>();
+ const TypeInfo float_op(DataType::FLOAT32);
+
+ // Create add node
+ auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params);
+
+ // Create sub node
+ auto sub_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{add_out_idx, sub_const_idx}, OIS{sub_out_idx}, sub_op_params);
+
+ // Create mul node
+ auto mul_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ auto mul_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ BinaryArithmetic::Param mul_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{sub_out_idx, mul_const_idx}, OIS{mul_out_idx}, mul_op_params);
+
+ graph->verify();
+ return graph;
+}
+
+/* Create branched graph:
+ * [Add]
+ * // \\
+ * [Mul1] [FC2]
+ * || ||
+ * [Mul2] [FC2]
+ * \\ //
+ * [Sub]
+ */
+std::shared_ptr<Graph> createBranchedGraph()
+{
+ auto graph = std::make_shared<Graph>();
+ const TypeInfo float_op(DataType::FLOAT32);
+
+ // Create add node
+ auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params);
+
+ // Create mul1 node
+ auto mul1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ auto mul1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ BinaryArithmetic::Param mul1_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{add_out_idx, mul1_const_idx}, OIS{mul1_out_idx},
+ mul1_op_params);
+
+ // Create mul2 node
+ auto mul2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ auto mul2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ BinaryArithmetic::Param mul2_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{mul1_out_idx, mul2_const_idx}, OIS{mul2_out_idx},
+ mul2_op_params);
+
+ // Create fc1 node
+ auto fc1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ auto fc1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ FullyConnected::Param fc1_op_params{Activation::NONE};
+ create<FullyConnected>(graph, OIS{add_out_idx, fc1_const_idx}, OIS{fc1_out_idx}, fc1_op_params);
+
+ // Create fc2 node
+ auto fc2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ auto fc2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ FullyConnected::Param fc2_op_params{Activation::NONE};
+ create<FullyConnected>(graph, OIS{fc1_out_idx, fc2_const_idx}, OIS{fc2_out_idx}, fc2_op_params);
+
+ // Create sub node
+ auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{mul2_out_idx, fc2_out_idx}, OIS{sub_out_idx}, sub_op_params);
+
+ graph->verify();
+ return graph;
+}
+
+//
+// Tests setup/teardown
+//
+
+// SetUp/TearDown methods runs before/after each test and performs actions common for each test
+class HESchedulerTest : public ::testing::Test
+{
+protected:
+ void SetUp() override
+ {
+ // Initialize mock backends
+ _cpu_backend = new MockBackendCPU();
+ _gpu_backend = new MockBackendGPU();
+ _npu_backend = new MockBackendNPU();
+ _mock_backends = {_cpu_backend, _gpu_backend, _npu_backend};
+
+ // Remove previous profile data if it exists
+ if (!remove("exec_time.json"))
+ {
+ // DO NOTHING (no profile data)
+ }
+
+ // Remember original value of 'EXECUTOR' environment variable
+ char *executor = std::getenv("EXECUTOR");
+ _original_executor = executor == nullptr ? "" : executor;
+
+ // Remember original value of 'PROFILING_MODE' environment variable
+ char *profiling_mode = std::getenv("PROFILING_MODE");
+ _original_profiling_mode = profiling_mode == nullptr ? "" : profiling_mode;
+ }
+
+ void TearDown() override
+ {
+ delete _cpu_backend;
+ delete _gpu_backend;
+ delete _npu_backend;
+ EXPECT_EQ(remove("exec_time.json"), 0);
+ setenv("EXECUTOR", _original_executor.c_str(), true);
+ setenv("PROFILING_MODE", _original_profiling_mode.c_str(), true);
+ }
+
+ const MockBackendCPU *_cpu_backend{nullptr};
+ const MockBackendGPU *_gpu_backend{nullptr};
+ const MockBackendNPU *_npu_backend{nullptr};
+ std::vector<const Backend *> _mock_backends;
+
+ std::string _original_executor;
+ std::string _original_profiling_mode;
+};
+
+//
+// HEScheduler tests
+//
+
+class HESchedulerTestWithExecutorParam : public HESchedulerTest,
+ public testing::WithParamInterface<std::string>
+{
+};
+
+// SchedulerTestWithExecutorParam tests are parameterized with executor name and runs three times -
+// one time for each executor
+INSTANTIATE_TEST_SUITE_P(AllExecutors, HESchedulerTestWithExecutorParam,
+ testing::Values(LINEAR, DATAFLOW, PARALLEL));
+
+// Test scheduler behavior for straight graph with known execution time of all nodes and permutes.
+TEST_P(HESchedulerTestWithExecutorParam, straight_graph_known_exec_time)
+{
+ setExecutor(GetParam());
+
+ // Prepare graph
+ ir::Model model;
+ auto graph(createStraightGraph());
+ model.push(ir::SubgraphIndex{0}, graph);
+ OperationIndex add_op_idx(0), sub_op_idx(1), mul_op_idx(2);
+
+ // Set default execution and transfer time
+ setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1);
+ setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul"},
+ {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4);
+
+ // Test 1
+ // Expected behaviour: scheduler assigns different backend to each node
+ {
+ // For each backend reduce execution time of one node
+ ExecTime et(_mock_backends);
+ setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, 1);
+ setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, 1);
+ setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, 1);
+ et.storeOperationsExecTime();
+
+ // Test scheduler
+ auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
+ auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
+ const auto br = scheduler.schedule(*graph);
+ ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
+ ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "gpu");
+ ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "npu");
+ }
+
+ // Test 2
+ // Expected behaviour: scheduler assigns single backend to all nodes because of big transfer time
+ {
+ // Increase transfer time
+ setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1e5);
+
+ // Test scheduler
+ auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
+ auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
+ const auto br = scheduler.schedule(*graph);
+ ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
+ ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu");
+ ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "cpu");
+ }
+}
+
+// Test scheduler behavior for branched graph with known execution time of all nodes and permutes
+TEST_P(HESchedulerTestWithExecutorParam, branched_graph_known_exec_time)
+{
+ const int64_t NPU_ET = 5000;
+ setExecutor(GetParam());
+
+ // Prepare graph
+ ir::Model model;
+ auto graph(createBranchedGraph());
+ model.push(ir::SubgraphIndex{0}, graph);
+ OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
+ sub_op_idx(5);
+
+ // Set default execution and transfer time
+ setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1000);
+ setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul", "FullyConnected"},
+ {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4);
+
+ // Test 1
+ // Expected behaviour: for dataflow and linear executors scheduler assigns fastest backend to all
+ // nodes, in case of parallel executor scheduler assigns different backends to branches.
+ {
+ // Reduce execution time
+ ExecTime et(_mock_backends);
+ setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, NPU_ET);
+ setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, NPU_ET);
+ setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, NPU_ET);
+ setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET);
+ setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET + 1000);
+ setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET + 1000);
+ et.storeOperationsExecTime();
+
+ // Test scheduler
+ auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
+ auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
+ const auto br = scheduler.schedule(*graph);
+
+ std::string branch1_expected_backend("npu"), branch2_expected_backend("npu");
+ if (GetParam() == PARALLEL)
+ {
+ branch1_expected_backend =
+ br->getBackend(mul1_op_idx)->config()->id() == "npu" ? "npu" : "gpu";
+ branch2_expected_backend = branch1_expected_backend == "npu" ? "gpu" : "npu";
+ }
+
+ ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu");
+ ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), branch1_expected_backend);
+ ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), branch1_expected_backend);
+ ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), branch2_expected_backend);
+ ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), branch2_expected_backend);
+ ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu");
+ }
+
+ // Test 2
+ // Expected behaviour: scheduler assigns single backend to all nodes
+ {
+ // Increase execution time for GPU backend
+ ExecTime et(_mock_backends);
+ /* for parallel executor: set a time, that is larger than sum_of_other_branches_nodes_cnt *
+ * npu_exec_time so that npu is prefered: the ith branch will wait for npu until it finishes the
+ * [0;i-1] branches nodes in DFS order. In each branch it goes deep intul doesn't encounter
+ * branching or scheduler assigns another backend to a node*/
+ setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET * 3 + 1);
+ setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET * 3 + 1);
+ et.storeOperationsExecTime();
+
+ // Test scheduler
+ auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
+ auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
+ const auto br = scheduler.schedule(*graph);
+ ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu");
+ ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
+ ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu");
+ ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "npu");
+ ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "npu");
+ ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu");
+ }
+}
+
+// Test scheduler behavior for branched graph and enabled profiling mode
+TEST_F(HESchedulerTest, branched_graph_profiling_mode)
+{
+ const int ET = 1e5;
+
+ // Turn on profiling mode
+ setProfilingMode(true);
+ setExecutor(DATAFLOW);
+
+ // Prepare graph
+ ir::Model model;
+ auto graph(createBranchedGraph());
+ model.push(ir::SubgraphIndex{0}, graph);
+ OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
+ sub_op_idx(5);
+
+ // Test 1
+ // Expected behaviour: scheduler assigns backends to nodes with unknown execution time
+ {
+ // Set execution time for all backends/nodes except for cpu/Sub, npu/Mul, gpu/FC
+ ExecTime et(_mock_backends);
+ setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, ET);
+ setOperationExecTime(et, _cpu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
+ setOperationExecTime(et, _cpu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
+ setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, ET);
+ setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
+ setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, ET);
+ setOperationExecTime(et, _gpu_backend, "Add", false, OPERATION_SIZE, ET);
+ setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
+ setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, ET);
+ et.storeOperationsExecTime();
+
+ // Test scheduler
+ auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
+ auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
+ const auto br = scheduler.schedule(*graph);
+ ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
+ ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu");
+ ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "gpu");
+ ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "gpu");
+ ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu");
+ }
+
+ // Test 2
+ // Expected behaviour: scheduler shuffling backends, so different backends are assigned to
+ // neighbor nodes
+ {
+ // Set execution time for rest backends/nodes (cpu/Sub, npu/Mul, gpu/FC)
+ ExecTime et(_mock_backends);
+ setOperationExecTime(et, _cpu_backend, "Sub", false, OPERATION_SIZE, ET);
+ setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
+ setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
+ et.storeOperationsExecTime();
+
+ // Test scheduler
+ auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
+ auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
+ const auto br = scheduler.schedule(*graph);
+ ASSERT_NE(br->getBackend(add_op_idx)->config()->id(),
+ br->getBackend(mul1_op_idx)->config()->id());
+ ASSERT_NE(br->getBackend(add_op_idx)->config()->id(),
+ br->getBackend(fc1_op_idx)->config()->id());
+ ASSERT_NE(br->getBackend(mul1_op_idx)->config()->id(),
+ br->getBackend(mul2_op_idx)->config()->id());
+ ASSERT_NE(br->getBackend(fc1_op_idx)->config()->id(),
+ br->getBackend(fc2_op_idx)->config()->id());
+ ASSERT_NE(br->getBackend(mul2_op_idx)->config()->id(),
+ br->getBackend(sub_op_idx)->config()->id());
+ ASSERT_NE(br->getBackend(fc2_op_idx)->config()->id(),
+ br->getBackend(sub_op_idx)->config()->id());
+ }
+}
+
+// TODO: Add tests with unknown execution and permutation time
+
+} // unnamed namespace
diff --git a/runtime/onert/core/src/compiler/Linear.cc b/runtime/onert/core/src/compiler/Linear.cc
index 49a989500..4dbe229c8 100644
--- a/runtime/onert/core/src/compiler/Linear.cc
+++ b/runtime/onert/core/src/compiler/Linear.cc
@@ -14,207 +14,38 @@
* limitations under the License.
*/
-#include <algorithm>
-
#include "Linear.h"
-#include "backend/IConfig.h"
-#include "backend/IConstantInitializer.h"
-#include "backend/ITensorRegister.h"
-#include "backend/Backend.h"
+#include "../dumper/text/GraphDumper.h"
+
#include "util/logging.h"
+#include <sstream>
+
namespace onert
{
namespace compiler
{
-std::vector<ir::OpSequenceIndex> Linear::linearize(const compiler::LoweredGraph &lowered_graph)
+// TODO(easy) Change the LoweredGraph param to Graph
+std::vector<ir::OperationIndex> Linear::linearize(const compiler::ILoweredGraph &lowered_graph)
{
- std::vector<ir::OpSequenceIndex> order;
- lowered_graph.iterateTopolOpSeqs(
- [&](const ir::OpSequenceIndex &index, const ir::OpSequence &) -> void {
- order.emplace_back(index);
- });
- return order;
+ return lowered_graph.graph().topolSortOperations();
}
-void Linear::dump(const compiler::LoweredGraph &lowered_graph,
- const std::vector<ir::OpSequenceIndex> &order)
+// TODO(easy) Change the LoweredGraph param to Graph
+void Linear::dump(const compiler::ILoweredGraph &lowered_graph,
+ const std::vector<ir::OperationIndex> &order)
{
+ for (const auto &ind : order)
{
- const auto &toString = [](const onert::backend::Backend *backend) {
- assert(backend);
- std::string str;
- str += backend->config()->id();
- return "{" + str + "}";
- };
-
- VERBOSE(Linear) << "Final OpSequence" << std::endl;
- for (const auto index : order)
- {
- const auto &op_seq = lowered_graph.op_seqs().at(index);
- const auto lower_info = lowered_graph.getLowerInfo(index);
- const auto &operations = lowered_graph.graph().operations();
- VERBOSE(Linear) << "* OP_SEQ " << toString(lower_info->backend()) << " "
- << ir::getStrFromOpSeq(op_seq, operations) << std::endl;
- }
+ // TODO Could logging system can handle this? (Inserting prefix for each line)
+ std::istringstream iss{dumper::text::formatOperation(lowered_graph.graph(), ind)};
+ std::string line;
+ while (std::getline(iss, line))
+ VERBOSE(GraphDumper) << line << std::endl;
}
}
-void Linear::planTensors(const compiler::LoweredGraph &lowered_graph,
- const std::vector<ir::OpSequenceIndex> &order)
-{
- const auto &graph = lowered_graph.graph();
- ir::OperandIndexMap<std::shared_ptr<backend::ITensorBuilder>> tensor_builder_map;
-
- ir::OperandIndexMap<uint32_t> uses_map;
- ir::OperandIndexMap<uint32_t> def_map;
- ir::OperandIndexSequence constants;
-
- // Prepare scanning
- graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
- const auto lower_info = lowered_graph.getLowerInfo(ind);
- // TODO Remove if onert doesn't support anymore such as
- // GeneratedTests.reshape_quant8_weights_as_inputs
- if (lower_info->def_factors().size() == 0 && lower_info->use_factors().size() == 0 &&
- !graph.getInputs().contains(ind))
- {
- VERBOSE(LINEAR) << "Operand #" << ind.value() << " will not be used. no more process."
- << std::endl;
- return;
- }
-
- // Unused input of subgraph
- // TODO Register unused input as nullptr in tensor_builder
- if (lower_info->def_factors().size() == 0 && lower_info->use_factors().size() == 0 &&
- graph.getInputs().contains(ind))
- {
- VERBOSE(LINEAR) << "Operand #" << ind.value() << " will not be used. no more process."
- << std::endl;
- return;
- }
-
- uses_map[ind] = obj.getUses().size();
- def_map[ind] = obj.getDef().valid() ? 1 : 0;
-
- bool is_const = obj.isConstant();
- if (is_const)
- {
- constants.append(ind);
- }
-
- auto factor = lower_info->def_factors().getOnlyElement();
- auto backend = factor.backend();
- auto tensor_builder = lowered_graph.backend_contexts().at(backend)->tensor_builder;
- if (!tensor_builder->isRegistered(ind))
- {
- // These tensors do not exist in any op_seq (No use and def)
- const auto info = obj.info();
- const auto backend_layout = factor.layout();
- // TODO Change tensor info to have permuted shape
- tensor_builder->registerTensorInfo(ind, info, backend_layout);
- }
-
- tensor_builder_map[ind] = tensor_builder;
- });
-
- // If a tensor is model output, increase the use of the tensor.
- // This aim is same to above one.
- for (const auto &ind : graph.getOutputs() | ir::Remove::DUPLICATED)
- {
- uses_map[ind]++;
- }
-
- // Start scanning to do notify{First|Last}Use for each tensor
-
- // If a tensor is a constant, increase the use of the tensor.
- // It makes the tensor not be dealloced. It means these will be deallocated last.
- // And allocate constant operands first
- VERBOSE(LINEAR) << "TENSORS as CONSTANT" << std::endl;
- for (const auto &ind : constants)
- {
- uses_map[ind]++;
- tensor_builder_map[ind]->notifyFirstUse(ind);
- }
-
- // Allocate Model's inputs
- VERBOSE(LINEAR) << "TENSORS as MODEL INPUT" << std::endl;
- for (const auto &ind : graph.getInputs() | ir::Remove::DUPLICATED)
- {
- auto tensor_builder = tensor_builder_map[ind];
- if (!tensor_builder) // for GeneratedTests.xxx_weights_as_inputs
- continue;
- tensor_builder->notifyFirstUse(ind);
- }
-
- // At each operation,
- // 1. Scan DEF of outputs. If the DEF, allocate it
- // 2. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
- VERBOSE(LINEAR) << "TENSORS" << std::endl;
- for (const auto op_seq_ind : order)
- {
- const auto &op_seq = lowered_graph.op_seqs().at(op_seq_ind);
- for (const auto &op_idx : op_seq.operations())
- {
- for (const auto &ind : graph.operations().at(op_idx).getOutputs() | ir::Remove::DUPLICATED |
- ir::Remove::UNDEFINED)
- {
- assert(def_map.find(ind) != def_map.end());
- if (def_map[ind])
- {
- def_map[ind] = 0;
- tensor_builder_map[ind]->notifyFirstUse(ind);
- }
- }
-
- for (const auto &ind : graph.operations().at(op_idx).getInputs() | ir::Remove::DUPLICATED |
- ir::Remove::UNDEFINED)
- {
- assert(uses_map.find(ind) != uses_map.end());
- assert(uses_map[ind] > 0);
- uses_map[ind]--;
- if (uses_map[ind] == 0)
- {
- // plan for deallocation of static tensornode
- tensor_builder_map[ind]->notifyLastUse(ind);
-
- // plan for deallocation of dynamic tensor
- auto dyn_tensor_manager = tensor_builder_map[ind]->dynamicTensorManager();
- if (dyn_tensor_manager)
- dyn_tensor_manager->planDealloc(op_idx, ind);
- }
- }
- }
- }
-
- // Dispose and validate
- for (const auto &ind : graph.getOutputs() | ir::Remove::DUPLICATED)
- {
- --uses_map[ind];
- if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
- {
- tensor_builder_map[ind]->notifyLastUse(ind);
- }
- }
-
- for (const auto &ind : constants)
- {
- --uses_map[ind];
- if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
- {
- tensor_builder_map[ind]->notifyLastUse(ind);
- }
- }
-
- assert(
- std::all_of(uses_map.begin(), uses_map.end(),
- [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-
- assert(
- std::all_of(def_map.begin(), def_map.end(),
- [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-}
-
} // namespace compiler
} // namespace onert
diff --git a/runtime/onert/core/src/compiler/Linear.h b/runtime/onert/core/src/compiler/Linear.h
index 1e24cf92b..4f92dc88d 100644
--- a/runtime/onert/core/src/compiler/Linear.h
+++ b/runtime/onert/core/src/compiler/Linear.h
@@ -20,18 +20,8 @@
#include <vector>
#include <memory>
-#include "ir/OpSequences.h"
#include "ir/Index.h"
-#include "backend/ITensorBuilder.h"
-#include "compiler/LoweredGraph.h"
-
-namespace onert
-{
-namespace ir
-{
-struct OperationVisitor;
-} // namespace ir
-} // namespace onert
+#include "compiler/ILoweredGraph.h"
namespace onert
{
@@ -41,11 +31,9 @@ namespace compiler
class Linear
{
public:
- static std::vector<ir::OpSequenceIndex> linearize(const compiler::LoweredGraph &lowered_graph);
- static void dump(const compiler::LoweredGraph &lowered_graph,
- const std::vector<ir::OpSequenceIndex> &order);
- static void planTensors(const compiler::LoweredGraph &lowered_graph,
- const std::vector<ir::OpSequenceIndex> &order);
+ static std::vector<ir::OperationIndex> linearize(const compiler::ILoweredGraph &lowered_graph);
+ static void dump(const compiler::ILoweredGraph &lowered_graph,
+ const std::vector<ir::OperationIndex> &order);
};
} // namespace compiler
diff --git a/runtime/onert/core/src/compiler/LoweredGraph.cc b/runtime/onert/core/src/compiler/LoweredGraph.cc
index 1489a1884..46a45e44a 100644
--- a/runtime/onert/core/src/compiler/LoweredGraph.cc
+++ b/runtime/onert/core/src/compiler/LoweredGraph.cc
@@ -16,21 +16,23 @@
#include "compiler/LoweredGraph.h"
-#include <assert.h>
-#include <sstream>
-#include "util/logging.h"
-#include "compiler/pass/ConstantInsertionPass.h"
-#include "compiler/pass/ConstantLoweringPass.h"
-#include "compiler/pass/PermutationOperationPass.h"
-#include "compiler/pass/PermutationInsertionPass.h"
-#include "compiler/pass/PermutationEliminationPass.h"
-#include "ir/GraphIterator.h"
-#include "ir/verifier/Verifier.h"
+#include "HEScheduler.h"
+#include "ManualScheduler.h"
+#include "pass/ConstantInsertionPass.h"
+#include "pass/ConstantLoweringPass.h"
+#include "pass/PassRunner.h"
+#include "pass/PermutationEliminationPass.h"
+#include "pass/PermutationInsertionPass.h"
+#include "pass/PermutationOperationPass.h"
+#include "../dumper/text/GraphDumper.h"
+#include "../ir/verifier/Verifier.h"
+
#include "backend/Backend.h"
-#include "backend/IConfig.h"
#include "compiler/BackendResolver.h"
-#include "compiler/ManualScheduler.h"
-#include "compiler/HEScheduler.h"
+#include "util/logging.h"
+
+#include <cassert>
+#include <sstream>
namespace onert
{
@@ -39,18 +41,15 @@ namespace compiler
LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &options) : _graph{graph}
{
- bool linear_executor = (options.executor == "Linear");
+ lowerGraph(options);
+}
+void LoweredGraph::lowerGraph(const CompilerOptions &options)
+{
// Build backend contexts
auto &backend_manager = BackendManager::get();
-
- // Always create Controlflow backend context
- auto cf_backend = backend_manager.getControlflow();
- _backend_contexts.emplace(
- cf_backend, cf_backend->newContext(_graph, _graph.getKernelBuilder(), linear_executor));
-
// Create contexts for other backends
- for (auto backend_str : options.backend_list)
+ for (auto &&backend_str : options.backend_list)
{
backend_manager.loadBackend(backend_str);
auto backend = backend_manager.get(backend_str);
@@ -60,12 +59,9 @@ LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &option
// we should change it back(throw if backend is not loaded) later.
if (!backend)
{
- VERBOSE(LoweredGraph) << "Cannot load backend - " << backend_str;
+ VERBOSE(LoweredGraph) << "Cannot load backend - " << backend_str << std::endl;
continue;
}
-
- _backend_contexts.emplace(
- backend, backend->newContext(_graph, _graph.getKernelBuilder(), linear_executor));
}
if (backend_manager.num_backends() == 0)
throw std::runtime_error{"No available backends loaded."};
@@ -73,317 +69,115 @@ LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &option
// TODO Move "schedule" phase out of here
// Schedule
std::unique_ptr<BackendResolver> backend_resolver;
+ auto all_backends = backend_manager.getAll();
if (options.he_scheduler)
{
- auto scheduler = HEScheduler(_backend_contexts, options);
+ auto scheduler = HEScheduler(all_backends, options);
backend_resolver = scheduler.schedule(_graph);
_indexed_ranks = scheduler.getIndexedRanks();
}
else
{
- auto scheduler = ManualScheduler(_backend_contexts, options);
+ auto scheduler = ManualScheduler(all_backends, options);
backend_resolver = scheduler.schedule(_graph);
}
- {
- // operand::LowerInfo holder
- ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> operands_lower_info;
-
- _graph.operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &) {
- operands_lower_info[index] = std::make_unique<ir::operand::LowerInfo>();
- });
-
- // Make op_seqs while checking whether a node can be merged into a op_seq.
- makeOpSequences(operands_lower_info, options, *backend_resolver);
+ makeLowerInfo(*backend_resolver);
+ VERBOSE(LoweredGraph) << "dump before mandatory passes" << std::endl;
+ dumper::text::dumpLoweredGraph(*this);
- _op_seqs.iterate([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
- assert(op_seq.operations().size() > 0);
- std::reverse(std::begin(op_seq.operations()), std::end(op_seq.operations()));
- });
+ // Mandatory passes - kind of legalization(?)
+ pass::PassRunner{}
+ .append(std::make_unique<pass::ConstantInsertionPass>(*this))
+ .append(std::make_unique<pass::ConstantLoweringPass>(*this))
+ .append(std::make_unique<pass::PermutationOperationPass>(*this))
+ .append(std::make_unique<pass::PermutationInsertionPass>(*this))
+ .run();
- VERBOSE(OpSequences) << "dump without permutation" << std::endl;
- dumpOpSequences(_op_seqs, _graph.operations());
+ dumpLowerInfo();
- pass::ConstantInsertionPass ci_pass(*this);
- ci_pass.run();
+ // Optimization passes (optional)
+ pass::PassRunner{}.append(std::make_unique<pass::PermutationEliminationPass>(*this)).run();
- pass::ConstantLoweringPass cl_pass(*this);
- cl_pass.run();
-
- // Set LowerInfo for each operand from the operand::LowerInfo holder
- manipulateLowerInfo(operands_lower_info, options.is_primary_subgraph);
-
- dumpLowerInfo();
- }
-
- // Run Permutation Passes
- {
- pass::PermutationOperationPass po_pass(*this);
- po_pass.run();
-
- pass::PermutationInsertionPass pi_pass(*this);
- pi_pass.run();
-
- pass::PermutationEliminationPass pe_pass(*this);
- pe_pass.run();
-
- VERBOSE(OpSequences) << "dump with permutation" << std::endl;
- dumpOpSequences(_op_seqs, _graph.operations());
- }
+ VERBOSE(LoweredGraph) << "Dump after all the passes" << std::endl;
+ for (auto &&operand : _graph.getInputs())
+ VERBOSE(LoweredGraph) << "Graph Input : " << operand << std::endl;
+ for (auto &&operand : _graph.getOutputs())
+ VERBOSE(LoweredGraph) << "Graph Output : " << operand << std::endl;
+ dumper::text::dumpLoweredGraph(*this);
// Graph verifications
{
+ assert(ir::verifier::InputOutputChecker().verify(_graph));
assert(ir::verifier::DAGChecker().verify(_graph));
- assert(ir::verifier::EdgeConsistencyChecker().verify(_graph));
+ assert(ir::verifier::EdgeChecker().verify(_graph));
}
}
-const ir::operation::LowerInfo *
-LoweredGraph::getLowerInfo(const ir::OpSequenceIndex &op_seq_index) const
+void LoweredGraph::makeLowerInfo(const compiler::BackendResolver &backend_resolver)
{
- auto itr = _lower_info_map.op_seq.find(op_seq_index);
- if (itr == _lower_info_map.op_seq.end())
- return nullptr;
- return itr->second.get();
-}
-
-void LoweredGraph::setLowerInfo(const ir::OpSequenceIndex &op_seq_index,
- std::unique_ptr<ir::operation::LowerInfo> &&lower_info)
-{
- _lower_info_map.op_seq.insert(std::make_pair(op_seq_index, std::move(lower_info)));
-}
+ _graph.operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &) {
+ lower_info().operand.set(index, std::make_unique<OperandLowerInfo>());
+ });
-void LoweredGraph::removeLowerInfo(const ir::OpSequenceIndex &op_seq_index)
-{
- auto &op_seq_lower_info = _lower_info_map.op_seq;
- assert(op_seq_lower_info.find(op_seq_index) != op_seq_lower_info.end());
- for (auto it = op_seq_lower_info.begin(); it != op_seq_lower_info.end(); ++it)
- {
- if (it->first == op_seq_index)
+ // Set operand lower info using assigned backends to operations
+ _graph.operations().iterate([&](const ir::OperationIndex &op_ind, const ir::IOperation &) {
+ const ir::IOperation &op = _graph.operations().at(op_ind);
+ auto backend = backend_resolver.getBackend(op_ind);
+ if (!backend)
{
- op_seq_lower_info.erase(it);
- break;
+ throw std::runtime_error{"Fail to find backend for " + op.name() + " operation"};
}
- }
-}
-
-const ir::operand::LowerInfo *LoweredGraph::getLowerInfo(const ir::OperandIndex &index) const
-{
- auto itr = _lower_info_map.operand.find(index);
- if (itr == _lower_info_map.operand.end())
- return nullptr;
- return itr->second.get();
-}
-
-ir::operand::LowerInfo *LoweredGraph::getLowerInfo(const ir::OperandIndex &index)
-{
- auto itr = _lower_info_map.operand.find(index);
- if (itr == _lower_info_map.operand.end())
- return nullptr;
- return itr->second.get();
-}
-
-void LoweredGraph::setLowerInfo(const ir::OperandIndex &index,
- std::unique_ptr<ir::operand::LowerInfo> &&lower_info)
-{
- _lower_info_map.operand.insert(std::make_pair(index, std::move(lower_info)));
-}
-
-void LoweredGraph::removeLowerInfo(const ir::OperandIndex &index)
-{
- _lower_info_map.operand.erase(index);
-}
-
-void LoweredGraph::iterateTopolOpSeqs(
- const std::function<void(const ir::OpSequenceIndex &, const ir::OpSequence &)> &fn) const
-{
- // Topological Sorting for ir::OpSequences
- std::vector<ir::OpSequenceIndex> topol_sorted;
- ir::PostDfsIterator<true>{}.iterateOpSeqs(
- *this, [&](const ir::OpSequenceIndex &index, const ir::OpSequence &) {
- topol_sorted.emplace_back(index);
- });
- std::reverse(topol_sorted.begin(), topol_sorted.end());
- for (const auto op_seq_idx : topol_sorted)
- {
- const auto &op_seq = _op_seqs.at(op_seq_idx);
- fn(op_seq_idx, op_seq);
- }
-}
-
-void LoweredGraph::iterateTopolOpSeqs(
- const std::function<void(const ir::OpSequenceIndex &, ir::OpSequence &)> &fn)
-{
- // Topological Sorting for ir::OpSequences
- std::vector<ir::OpSequenceIndex> topol_sorted;
- ir::PostDfsIterator<false>{}.iterateOpSeqs(
- *this, [&](const ir::OpSequenceIndex &index, ir::OpSequence &) {
- topol_sorted.emplace_back(index);
- });
- std::reverse(topol_sorted.begin(), topol_sorted.end());
- for (const auto op_seq_idx : topol_sorted)
- {
- auto &op_seq = _op_seqs.at(op_seq_idx);
- fn(op_seq_idx, op_seq);
- }
-}
-
-ir::OpSequenceIndex LoweredGraph::appendFreshSingleOpSequence(const ir::OperationIndex &node_index,
- const ir::Operation &node)
-{
- // Create a fresh op_seq with one operation, and append it to op_seqs
- // Create a fresh op_seq
- auto op_seq = std::make_unique<ir::OpSequence>(_graph.layout());
-
- // Add an operation
- op_seq->appendOperation(node_index);
-
- // Update input/output
- op_seq->setOutputs(node.getOutputs());
- op_seq->setInputs(node.getInputs());
-
- return _op_seqs.emplace(std::move(op_seq));
-}
-
-void LoweredGraph::makeOpSequences(
- ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
- const CompilerOptions &options, const BackendResolver &backend_resolver)
-{
- // if SUBG_MAX_NODE == 0, no limit on nodes of a op_seq
- const int op_seq_max_node = options.op_seq_max_node;
- assert(op_seq_max_node >= 0);
-
- bool is_profiling = options.he_profiling_mode;
- ir::OpSequence *op_seq = nullptr;
- ir::OpSequenceIndex op_seq_index;
-
- // NOTE: The below method appends nodes while making one op_seq if needed. If something better
- // ways, happy to update this code.
- ir::PostDfsConstIterator{}.iterate(
- _graph, [&](const ir::OperationIndex &node_index, const ir::Operation &node) {
- // LowerInfo for in/output operands
- auto backend = backend_resolver.getBackend(node_index);
-
- // Get frontend's layout
- auto frontend_layout = _graph.layout();
-
- // The layout of each backend should be set at another place
- // TODO Change setting layout of each backend at another place
- auto backend_layout = backend->config()->supportLayout(node, frontend_layout);
-
- for (auto operand : node.getInputs() | ir::Remove::UNDEFINED)
- {
- auto &&lower_info = operands_lower_info.at(operand);
- lower_info->addUsePermuteFactor(ir::operand::PermuteFactor{backend, backend_layout});
- }
- for (auto operand : node.getOutputs())
- {
- auto &&lower_info = operands_lower_info.at(operand);
- lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{backend, backend_layout});
- }
-
- bool new_op_seq = (op_seq == nullptr ||
- (op_seq_max_node != 0 &&
- op_seq->operations().size() >= static_cast<size_t>(op_seq_max_node)));
-
- // for profiling each op_seq must contain just one node,
- // so that we can measure a node separately
- if (new_op_seq || is_profiling ||
- !mergeable(op_seq_index, node_index, backend_layout, backend_resolver))
- {
- auto new_op_seq_index = appendFreshSingleOpSequence(node_index, node);
-
- // ir::OpSequence LowerInfo
- setLowerInfo(new_op_seq_index,
- std::make_unique<ir::operation::LowerInfo>(backend, backend_layout));
-
- op_seq_index = new_op_seq_index;
- op_seq = &(_op_seqs.at(new_op_seq_index));
-
- VERBOSE(Lower) << "OpSequence#" << op_seq_index.value() << " is created for "
- << "NODE#" << node_index.value() << "(" << node.name() << ")" << std::endl;
- }
- else
- {
- op_seq->appendOperation(node_index);
- // Set inputs
- auto new_inputs = node.getInputs();
- // Add inputs except outputs of the previous node
- for (auto ind : op_seq->getInputs())
- {
- if (!node.getOutputs().contains(ind))
- new_inputs.append(ind);
- }
- op_seq->setInputs(new_inputs);
- VERBOSE(Lower) << "OpSequence#" << op_seq_index.value() << " merges "
- << "NODE#" << node_index.value() << "(" << node.name() << ")" << std::endl;
- }
- });
-}
+ auto frontend_layout = _graph.layout();
-void LoweredGraph::manipulateLowerInfo(
- ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
- bool is_primary)
-{
- const auto controlflow_backend = BackendManager::get().getControlflow();
+ // The layout of each backend should be set at another place
+ // TODO Change setting layout of each backend at another place
+ auto backend_layout = backend->config()->supportLayout(op, frontend_layout);
- // TODO Rather than handling primary graph specially,
- // let the permute inserted and remove it later
- if (is_primary)
- {
- // TODO Rather than using NHWC Get frontend layout of this node from IR
- auto factor = ir::operand::PermuteFactor{controlflow_backend, ir::Layout::NHWC};
- for (auto index : _graph.getInputs() | ir::Remove::UNDEFINED)
+ for (auto &&ind : op.getInputs() | ir::Remove::UNDEFINED)
{
- auto &&lower_info = operands_lower_info.at(index);
- assert(lower_info->def_factors().empty());
- lower_info->addDefPermuteFactor(factor);
+ auto &operand_li = lower_info().operand.at(ind);
+ operand_li.addUsePermuteFactor(PermuteFactor{backend, backend_layout});
}
- for (auto index : _graph.getOutputs())
+ for (auto &&ind : op.getOutputs() | ir::Remove::UNDEFINED)
{
- auto &&lower_info = operands_lower_info.at(index);
- lower_info->addUsePermuteFactor(factor);
+ auto &operand_li = lower_info().operand.at(ind);
+ operand_li.addDefPermuteFactor(PermuteFactor{backend, backend_layout});
}
- }
- else
+ lower_info().operation.set(
+ op_ind, std::make_unique<compiler::OperationLowerInfo>(backend, backend_layout));
+ });
+
+ // Handle graph inputs and outputs
+ const auto builtin_backend = BackendManager::get().getBuiltin();
+ auto factor = PermuteFactor{builtin_backend, _graph.layout()};
+ for (auto &&index : _graph.getInputs() | ir::Remove::UNDEFINED)
{
- for (auto index : _graph.getInputs() | ir::Remove::UNDEFINED)
- {
- auto &&lower_info = operands_lower_info.at(index);
- if (!(lower_info->def_factors().size() == 0 && lower_info->use_factors().size() == 0))
- {
- // In case of not that Graph's input is not used in any operation and not the graph's
- // output.
- // In other words, it is not unused input in Graph.
- lower_info->addDefPermuteFactor(*lower_info->use_factors().begin());
- }
- else
- {
- // In case of that an operand is Graph's input and not input or output of any operation
- lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{
- controlflow_backend,
- ir::Layout::NHWC // TODO Get frontend layout of this node from IR
- });
- }
- }
+ auto &operand_li = lower_info().operand.at(index);
+ assert(operand_li.def_factors().empty());
+ operand_li.addDefPermuteFactor(factor);
}
- for (auto index : _graph.getOutputs())
+ for (auto &&index : _graph.getOutputs() | ir::Remove::UNDEFINED)
{
- auto &&lower_info = operands_lower_info.at(index);
- if (lower_info->def_factors().size() == 0)
- {
- // In case of that an operand is Graph's output and not input or output of any operation
- lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{
- controlflow_backend,
- ir::Layout::NHWC // TODO Get frontend layout of this node from IR
- });
- }
+ auto &operand_li = lower_info().operand.at(index);
+ operand_li.addUsePermuteFactor(factor);
}
- // Set LowerInfo for each operand from the operand::LowerInfo holder
- _graph.operands().iterate([&](const ir::OperandIndex &index, ir::Operand &) {
- setLowerInfo(index, std::move(operands_lower_info[index]));
+ // Handle variable tensors
+ _graph.operands().iterate([&](const ir::OperandIndex &index, ir::Operand &operand) {
+ // Some inputs of an operation could be non-constant, but not existed in graph inputs/outputs
+ // and not undefined operand - these are variable tensors. For example,
+ // UnidirectionalSequenceLSTM has such inputs.
+ if (operand.info().isVariable())
+ {
+ // The variable operand with buffer is not supported yet
+ assert(operand.data() == nullptr);
+ assert(operand.getUses().size() == 1 && !operand.getDef().valid());
+ auto operand_li = lower_info().operand.at(index);
+ assert(operand_li.def_factors().empty());
+ operand_li.addDefPermuteFactor(operand_li.use_factors().getOnlyElement());
+ }
});
}
@@ -395,12 +189,22 @@ void LoweredGraph::dumpLowerInfo()
std::map<uint32_t, std::string> dumps;
_graph.operands().iterate([&](const ir::OperandIndex &index, ir::Operand &object) {
- std::stringstream sstream;
- if (!getLowerInfo(index)->def_factors().empty() || !getLowerInfo(index)->use_factors().empty())
+ const auto operand_lower_info = lower_info().operand.getRawPtr(index);
+ assert(operand_lower_info);
+ if (!operand_lower_info->def_factors().empty() || !operand_lower_info->use_factors().empty())
{
- auto factors_to_string = [](const ir::operand::PermuteFactorSet &factors) {
+ auto shape_to_string = [](const ir::Shape &shape) {
+ std::stringstream sstream;
+ sstream << "{ ";
+ for (auto i = 0; i < shape.rank(); ++i)
+ sstream << (shape.dim(i)) << " ";
+ sstream << "}";
+ return sstream.str();
+ };
+
+ auto factors_to_string = [](const PermuteFactorSet &factors) {
std::string str;
- for (auto factor : factors)
+ for (auto &&factor : factors)
{
str += factor.backend()->config()->id();
str += "(" + to_string(factor.layout()) + ")";
@@ -409,159 +213,45 @@ void LoweredGraph::dumpLowerInfo()
return "{ " + str + "}";
};
- auto operation_index_to_string = [](const ir::OperationIndexSet &operations) {
- std::string str;
- for (auto op : operations)
- {
- str += std::to_string(op.value());
- str += " ";
- }
- return "{ " + str + "}";
+ auto operation_index_set_to_string = [](const ir::OperationIndexSet &operations) {
+ std::stringstream sstream;
+ sstream << "{ ";
+ for (auto &&op : operations)
+ sstream << op << " ";
+ sstream << "}";
+ return sstream.str();
+ };
+
+ auto data_to_str = [](const ir::Data *data) {
+ return (data ? (std::to_string(data->size()) + " bytes") : "N/A");
};
- const auto lower_info = getLowerInfo(index);
- const auto &shape = object.shape();
- std::string def_ops =
- object.getDef().valid() ? std::to_string(object.getDef().value()) : "N/A";
- std::string use_ops = operation_index_to_string(object.getUses());
- std::string def_layouts = factors_to_string(lower_info->def_factors());
- std::string use_layouts = factors_to_string(lower_info->use_factors());
- sstream << "Operand #" << index.value() << " LowerInfo" << std::endl;
- sstream << " - Shape : { ";
- for (auto i = 0; i < shape.rank(); ++i)
- {
- sstream << (shape.dim(i)) << " ";
- }
- sstream << "}" << std::endl;
- sstream << " - Def ir::Operations : " << def_ops << std::endl;
- sstream << " - Use ir::Operations : " << use_ops << std::endl;
- sstream << " - Lower Info" << std::endl;
- sstream << " - Def Backends : " << def_layouts << std::endl;
- sstream << " - Use Backends : " << use_layouts << std::endl;
+ std::string shape_str = shape_to_string(object.shape());
+ std::string def_op = operation_index_set_to_string({object.getDef()});
+ std::string use_ops = operation_index_set_to_string(object.getUses());
+ std::string def_factors = factors_to_string(operand_lower_info->def_factors());
+ std::string use_factors = factors_to_string(operand_lower_info->use_factors());
+ std::stringstream sstream;
+ sstream << "Operand " << index << " Info" << std::endl;
+ sstream << " - Shape : " << shape_str << std::endl;
+ sstream << " - Def/Uses : Def " << def_op << " Uses " << use_ops << std::endl;
+ sstream << " - Data : " << data_to_str(object.data()) << std::endl;
+ sstream << " - LowerInfo : Def " << def_factors << " Uses " << use_factors << std::endl;
+ dumps.emplace(index.value(), sstream.str());
}
- dumps.emplace(index.value(), sstream.str());
});
for (const auto &e : dumps)
{
if (!e.second.empty())
{
- VERBOSE(Lower) << e.second;
+ std::istringstream iss(e.second);
+ std::string line;
+ while (std::getline(iss, line))
+ VERBOSE(Lower) << line << std::endl;
}
}
}
-bool LoweredGraph::mergeable(const ir::OpSequenceIndex &op_seq_index,
- const ir::OperationIndex &node_index, ir::Layout layout,
- const BackendResolver &backend_resolver)
-{
- // Are they mergeable?
- // 1. the same backend id and layout?
- // 2. Is op_seq or node branched?
- // 3. if 1 is true, the op_seq and a node are connected?
- const auto &op_seq = _op_seqs.at(op_seq_index);
- const auto &node = _graph.operations().at(node_index);
-
- // The same backend id and layout?
- {
- const auto op_seq_backend_layout = getLowerInfo(op_seq_index)->layout();
- const auto &op_seq_backend_id = getLowerInfo(op_seq_index)->backend()->config()->id();
- const auto &node_backend_id = backend_resolver.getBackend(node_index)->config()->id();
- VERBOSE(Lower) << "OpSequence#" << op_seq_index.value() << " { " << op_seq_backend_id << "("
- << to_string(op_seq_backend_layout) << ") } "
- << " NODE#" << node_index.value() << " (" << node.name() << ") { "
- << node_backend_id << "(" << to_string(layout) << ") } " << std::endl;
- if (op_seq_backend_id != node_backend_id || op_seq_backend_layout != layout)
- return false;
- }
-
- // Branched?
- {
- std::unordered_set<ir::OperationIndex> branched_set;
-
- // Check for branching up
- for (const auto &input : op_seq.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
- {
- const auto &input_obj = _graph.operands().at(input);
- auto def = input_obj.getDef();
- if (def.valid())
- {
- branched_set.insert(def);
- if (branched_set.size() > 1)
- {
- return false;
- }
- }
- }
- branched_set.clear();
-
- // Check for branching down
- for (const auto &output : node.getOutputs() | ir::Remove::DUPLICATED)
- {
- // TODO Fix this workaround for the case of model outputs that are used by another operation
- // This is needed since the branching is decided by operation, but for model outputs,
- // there is controlflow backen(use backend) but no actual use operation exists
- if (_graph.getOutputs().contains(output))
- return false;
-
- const auto &output_obj = _graph.operands().at(output);
- for (const auto &use : output_obj.getUses())
- {
- branched_set.insert(use);
- if (branched_set.size() > 1)
- {
- return false;
- }
- }
- }
- }
-
- // Connected?
- // an input of one node is an output of the other node? or vice-versa?
- {
- const auto &node_inputs = node.getInputs();
- const auto &node_outputs = node.getOutputs();
-
- // op_seq's operations are in order so that we just check the first and the last
- std::vector<ir::OperationIndex> op_seq_ops{op_seq.operations()[0]};
- if (op_seq.operations().size() > 1)
- op_seq_ops.emplace_back(op_seq.operations()[op_seq.operations().size() - 1]);
-
- for (const auto &n_index : op_seq_ops)
- {
- const auto &n = _graph.operations().at(n_index);
-
- // node's output == op_seq's input?
- for (const auto input : n.getInputs() | ir::Remove::UNDEFINED)
- {
- if (node_outputs.contains(input))
- {
- VERBOSE(Lower) << "OpSequence#" << op_seq_index.value() << " 's NODE#" << n_index.value()
- << "(" << n.name() << ") is connected to NODE#" << node_index.value()
- << "(" << node.name() << ")" << std::endl;
- return true;
- }
- }
-
- // node's input == op_seq's output?
- for (const auto output : n.getOutputs())
- {
- if (node_inputs.contains(output))
- {
- VERBOSE(Lower) << "OpSequence#" << op_seq_index.value() << " 's NODE#" << n_index.value()
- << " (" << n.name() << ") is connected to NODE#" << node_index.value()
- << std::endl;
- return true;
- }
- }
- }
-
- VERBOSE(Lower) << "OpSequence#" << op_seq_index.value() << " is not connected to NODE#"
- << node_index.value() << "(" << node.name() << ")" << std::endl;
- }
-
- return false;
-}
-
} // namespace compiler
} // namespace onert
diff --git a/runtime/onert/core/src/compiler/ManualScheduler.cc b/runtime/onert/core/src/compiler/ManualScheduler.cc
index ed49ee56f..ccd08893f 100644
--- a/runtime/onert/core/src/compiler/ManualScheduler.cc
+++ b/runtime/onert/core/src/compiler/ManualScheduler.cc
@@ -29,9 +29,9 @@ namespace onert
namespace compiler
{
-ManualScheduler::ManualScheduler(const backend::BackendContexts &backend_contexts,
+ManualScheduler::ManualScheduler(const std::vector<const backend::Backend *> &backends,
const compiler::CompilerOptions &options)
- : _backend_contexts{backend_contexts}, _options{options}
+ : _backends{backends}, _options{options}
{
}
@@ -42,7 +42,7 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap
// This fallback will be used in case that `backend_for_all` is unavailable
auto fallback = [&]() -> const backend::Backend * {
- for (auto backend_id : _options.backend_list)
+ for (auto &&backend_id : _options.backend_list)
{
auto backend = resolveBackend(backend_id);
if (backend)
@@ -58,20 +58,20 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap
VERBOSE(ManualScheduler) << "Default backend for all ops: " << backend_all->config()->id()
<< std::endl;
- graph.operations().iterate([&](const ir::OperationIndex &index, const ir::Operation &) {
+ graph.operations().iterate([&](const ir::OperationIndex &index, const ir::IOperation &) {
backend_resolver->setBackend(index, backend_all);
});
// 2. Backend per operation type
std::unordered_map<ir::OpCode, backend::Backend *> op_type_map;
- for (auto &pair : manual_options.opcode_to_backend)
+ for (const auto &pair : manual_options.opcode_to_backend)
{
op_type_map.emplace(pair.first, BackendManager::get().get(pair.second));
}
// By default, Custom uses cpu backend
op_type_map[ir::OpCode::Custom] = BackendManager::get().get("cpu");
- graph.operations().iterate([&](const ir::OperationIndex &index, const ir::Operation &operation) {
+ graph.operations().iterate([&](const ir::OperationIndex &index, const ir::IOperation &operation) {
auto itr = op_type_map.find(operation.opcode());
if (itr != op_type_map.end())
{
@@ -80,7 +80,7 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap
});
// 3. Backend per operation
- for (auto &pair : manual_options.index_to_backend)
+ for (const auto &pair : manual_options.index_to_backend)
{
const auto &key = pair.first;
const auto &val = pair.second;
@@ -88,22 +88,21 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap
try
{
graph.operations().at(key); // Check if exist, or this will throw
- backend_resolver->setBackend(
- key, BackendManager::get().get(
- val)); // TODO Ensure this backend is available in backend contexts
+ backend_resolver->setBackend(key, BackendManager::get().get(val));
}
catch (...)
{
- VERBOSE(ManualScheduler) << "Invalid value while OperationIndex to Backend mapping : @"
- << key.value() << " -> \"" << val << "\"" << std::endl;
+ VERBOSE(ManualScheduler) << "Invalid value while OperationIndex to Backend mapping : @" << key
+ << " -> \"" << val << "\"" << std::endl;
}
}
// Dump final assignment
- backend_resolver->iterate([&](const ir::OperationIndex &index, const backend::Backend &backend) {
- VERBOSE(ManualScheduler) << "backend for operation #" << index.value() << ": "
- << backend.config()->id() << std::endl;
- });
+ WHEN_LOG_ENABLED(backend_resolver->iterate(
+ [&](const ir::OperationIndex &index, const backend::Backend &backend) {
+ VERBOSE(ManualScheduler) << "backend for " << index << ": " << backend.config()->id()
+ << std::endl;
+ }));
return backend_resolver;
}
@@ -113,7 +112,7 @@ const backend::Backend *ManualScheduler::resolveBackend(const std::string &id,
{
// Ensure if the backend is available in the current backend context
const backend::Backend *backend = BackendManager::get().get(id);
- if (!backend || _backend_contexts.find(backend) == _backend_contexts.end())
+ if (!backend || std::find(_backends.begin(), _backends.end(), backend) == _backends.end())
{
backend = fallback;
}
diff --git a/runtime/onert/core/src/compiler/ManualScheduler.h b/runtime/onert/core/src/compiler/ManualScheduler.h
index 41503f7ff..18732d744 100644
--- a/runtime/onert/core/src/compiler/ManualScheduler.h
+++ b/runtime/onert/core/src/compiler/ManualScheduler.h
@@ -28,7 +28,7 @@ namespace compiler
class ManualScheduler : public IScheduler
{
public:
- ManualScheduler(const backend::BackendContexts &backend_contexts,
+ ManualScheduler(const std::vector<const backend::Backend *> &backends,
const compiler::CompilerOptions &options);
std::unique_ptr<BackendResolver> schedule(const ir::Graph &graph) override;
@@ -37,7 +37,7 @@ private:
const backend::Backend *fallback = nullptr);
private:
- const backend::BackendContexts &_backend_contexts;
+ std::vector<const backend::Backend *> _backends;
compiler::CompilerOptions _options;
};
diff --git a/runtime/onert/core/src/compiler/MultiModelCompiler.cc b/runtime/onert/core/src/compiler/MultiModelCompiler.cc
new file mode 100644
index 000000000..141fdfe09
--- /dev/null
+++ b/runtime/onert/core/src/compiler/MultiModelCompiler.cc
@@ -0,0 +1,242 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MultiModelCompiler.h"
+
+#include "CompilerHelpers.h"
+#include "ExecutorFactory.h"
+#include "ShapeValidator.h"
+#include "pass/ConstantOutputPass.h"
+#include "pass/OddOutputPass.h"
+#include "pass/PassRunner.h"
+#include "pass/UnusedOperandEliminationPass.h"
+#include "../dumper/dot/DotDumper.h"
+#include "../exec/Executors.h"
+#include "../ir/OperationDumper.h"
+#include "../ir/verifier/Verifier.h"
+
+#include "compiler/StaticShapeInferer.h"
+
+#include <misc/string_helpers.h>
+#include <misc/polymorphic_downcast.h>
+
+namespace onert
+{
+namespace compiler
+{
+
+MultiModelCompiler::MultiModelCompiler(const std::shared_ptr<ir::NNPkg> &nnpkg,
+ std::vector<std::unique_ptr<CompilerOptions>> &copts)
+ : _nnpkg{nnpkg}, _voptions{}
+{
+ assert(nnpkg->model_count() != 1);
+
+ for (uint32_t i = 0; i < copts.size(); i++)
+ {
+ _voptions.push_back(copts[i].get());
+ }
+}
+
+std::shared_ptr<CompilerArtifact> MultiModelCompiler::compile(void)
+{
+ /***************************************************
+ * Prepare compilation phase
+ ***************************************************/
+ for (auto &&options : _voptions)
+ {
+ if (!options)
+ throw std::runtime_error{"Empty compile option"};
+
+ // Mode check
+ // TODO handle option for each model
+ if (options->he_profiling_mode)
+ throw std::runtime_error("NYI: Profiling mode for multiple model is not supported yet");
+
+ if (!options->minmax_filepath.empty())
+ throw std::runtime_error("Recording minmax is not supported for multiple models");
+
+ options->forceInternalOptions();
+ options->verboseOptions();
+ }
+
+ // NYI: allow one model compilation
+ auto const model_count = _nnpkg->model_count();
+ if (model_count != _voptions.size())
+ throw std::runtime_error{"Model count and option vector size mismatch"};
+
+ for (uint16_t i = 0; i < model_count; i++)
+ {
+ if (!_nnpkg->model(ir::ModelIndex{i})->hasOnly<ir::Graph>())
+ throw std::runtime_error("MultiModelCompiler can only compile models for inference.");
+ }
+
+ for (uint16_t i = 0; i < model_count; i++)
+ {
+ _nnpkg->model(ir::ModelIndex{i})->iterate([&](const ir::SubgraphIndex &, ir::IGraph &graph) {
+ auto &subg = nnfw::misc::polymorphic_downcast<ir::Graph &>(graph);
+
+ // Mandatory passes
+ pass::PassRunner{}
+ .append(std::make_unique<pass::ConstantOutputPass>(subg))
+ .append(std::make_unique<pass::OddOutputPass>(subg))
+ .run();
+
+ // Optimizations
+ pass::PassRunner{}.append(std::make_unique<pass::UnusedOperandEliminationPass>(subg)).run();
+ });
+ }
+
+ /***************************************************
+ * Backend independent analysis & optimization phase
+ ***************************************************/
+ // TODO Handle dump level for each model
+ auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_voptions[0]->graph_dump_level);
+ onert::dumper::dot::DotDumper dot_dumper(dump_level);
+
+ // Tracing context
+ // TODO Support tracing_ctx for multiple model
+ std::unique_ptr<util::TracingCtx> tracing_ctx = nullptr;
+
+ // Model edge context: copy model edge context
+ auto model_edges = std::make_unique<ir::ModelEdges>(_nnpkg->model_edges());
+
+ // Custom kernels
+ std::unordered_map<ir::ModelIndex, std::shared_ptr<backend::custom::IKernelBuilder>>
+ custom_kernel_builders;
+ for (uint16_t i = 0; i < model_count; i++)
+ {
+ auto const model_index = ir::ModelIndex{i};
+ custom_kernel_builders[model_index] = _nnpkg->model(model_index)->getKernelBuilder();
+ }
+
+ // Lower: Assign backend
+ std::unordered_map<ir::ModelIndex,
+ std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>>
+ lowered_subgs;
+
+ for (uint16_t i = 0; i < model_count; i++)
+ {
+ auto const model_index = ir::ModelIndex{i};
+ auto model = _nnpkg->model(model_index);
+
+ model->iterate([&](const ir::SubgraphIndex &subg_index, ir::IGraph &graph) {
+ auto &subg = nnfw::misc::polymorphic_downcast<ir::Graph &>(graph);
+
+ dot_dumper.dump(subg,
+ nnfw::misc::str("before_lower_model-", i, "-subg-", subg_index.value()));
+ // Lower: Assign backend
+ lowered_subgs[model_index][subg_index] =
+ std::make_unique<compiler::LoweredGraph>(subg, *_voptions[i]);
+ // Set tracing_ctx for copied graph
+ if (tracing_ctx != nullptr)
+ tracing_ctx->setSubgraphIndex(&(lowered_subgs[model_index][subg_index]->graph()),
+ subg_index.value());
+ });
+ }
+
+ _nnpkg.reset();
+
+ for (const auto &pair : lowered_subgs)
+ {
+ const auto &model_index = pair.first;
+ const auto &model_lsubg = pair.second;
+
+ for (const auto &pair_inner : model_lsubg)
+ {
+ const auto &subg_index = pair_inner.first;
+ const auto &lowered_subg = pair_inner.second;
+ dot_dumper.dump(*lowered_subg, nnfw::misc::str("after_lower_model-", model_index.value(),
+ "-subg-", subg_index.value()));
+ }
+ }
+
+ // Shape inference.
+ for (auto &&pair : lowered_subgs)
+ {
+ auto &model_lsubgs = pair.second;
+ // Run the StaticShapeInfer of primary subg. All child StaticShapeInferers are called
+ // recursively
+ std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers =
+ createStaticShapeInferers(model_lsubgs);
+
+ const auto primary_subg_idx = ir::SubgraphIndex{0};
+ inferers.at(primary_subg_idx)->infer();
+
+ for (const auto &pair_inferer : inferers)
+ {
+ const auto inferer = pair_inferer.second.get();
+ inferer->dump();
+ }
+ }
+
+ // Shape validation
+ // TODO Move shape independent feature check from ShapeValidator to OperationValidator
+ // TODO Move ShapeValidator into shape inference
+ // - Check input tensor shape validation
+ // - Check parameter value validation which valid value is depend on input tensor shape
+ // - Output tensor shape validation check is needless because
+ // static/dynamic shape inferer will make valid output shape
+ for (const auto &pair : lowered_subgs)
+ {
+ const auto &model_lsubgs = pair.second;
+
+ for (const auto &pair_inner : model_lsubgs)
+ {
+ const auto &lowered_subg = pair_inner.second;
+ compiler::ShapeValidator{lowered_subg->graph()}();
+ }
+ }
+
+ /*************************************************************
+ * Backend independent analysis & optimization phase finished
+ *************************************************************/
+ auto executors = std::make_shared<exec::Executors>(std::move(model_edges));
+ for (auto &&pair : lowered_subgs)
+ {
+ auto const &model_index = pair.first;
+ auto &model_lsubgs = pair.second;
+
+ for (auto &&pair_inner : model_lsubgs)
+ {
+ auto const subg_index = pair_inner.first;
+ auto &lowered_subg = pair_inner.second;
+ auto const indexed_ranks = lowered_subg->indexed_ranks();
+
+ ir::OperationDumper dumper("Executor generation of Subgraph " +
+ std::to_string(subg_index.value()));
+ lowered_subg->graph().operations().iterate(
+ [&](const ir::OperationIndex &, const ir::IOperation &op) { op.accept(dumper); });
+
+ ExecutorFactoryArgs args;
+ args.tracing_ctx = tracing_ctx.get();
+ args.options = _voptions[model_index.value()];
+ args.model_index = model_index;
+ args.custom_kernel_builder = custom_kernel_builders[model_index];
+ auto executor = std::unique_ptr<exec::IExecutor>{
+ ExecutorFactory::get().create(std::move(lowered_subg), executors, args)};
+ executor->setIndexedRanks(indexed_ranks);
+ executors->emplace(model_index, subg_index, std::move(executor));
+ }
+ }
+
+ /********************************
+ * Code generation phase finished
+ ********************************/
+ return std::make_shared<CompilerArtifact>(executors, std::move(tracing_ctx));
+}
+
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/MultiModelCompiler.h b/runtime/onert/core/src/compiler/MultiModelCompiler.h
new file mode 100644
index 000000000..b282a5087
--- /dev/null
+++ b/runtime/onert/core/src/compiler/MultiModelCompiler.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file MultiModelCompiler.h
+ * @brief This file contains MultiModelCompiler class to define and run compilation phase
+ */
+
+#ifndef __ONERT_COMPILER_MULTI_MODEL_COMPILER_H__
+#define __ONERT_COMPILER_MULTI_MODEL_COMPILER_H__
+
+#include "compiler/CompilerOptions.h"
+#include "compiler/ICompiler.h"
+#include "ir/NNPkg.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+/**
+ * @brief Class to compile NN package
+ */
+class MultiModelCompiler final : public ICompiler
+{
+public:
+ /**
+ * @brief Construct a new Compiler object for NN package
+ * @param[in] nnpkg NN package to compile
+ * @param[in] coptions Compiler option vector for each model in package
+ */
+ MultiModelCompiler(const std::shared_ptr<ir::NNPkg> &nnpkg,
+ std::vector<std::unique_ptr<CompilerOptions>> &copts);
+
+ /**
+ * @brief Destroy the MultiModelCompiler object
+ */
+ ~MultiModelCompiler() = default;
+
+public:
+ /**
+ * @brief Do compilation with the options
+ *
+ * @return std::shared_ptr<CompilerArtifact> Executors as a result of compilation
+ */
+ std::shared_ptr<CompilerArtifact> compile(void);
+
+private:
+ std::shared_ptr<ir::NNPkg> _nnpkg;
+ std::vector<CompilerOptions *> _voptions;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_MULTI_MODEL_COMPILER_H__
diff --git a/runtime/onert/core/src/ir/operation/LowerInfo.cc b/runtime/onert/core/src/compiler/OperationLowerInfo.cc
index 249918bd6..e8a438130 100644
--- a/runtime/onert/core/src/ir/operation/LowerInfo.cc
+++ b/runtime/onert/core/src/compiler/OperationLowerInfo.cc
@@ -14,21 +14,18 @@
* limitations under the License.
*/
-#include "ir/operation/LowerInfo.h"
+#include "compiler/OperationLowerInfo.h"
namespace onert
{
-namespace ir
-{
-namespace operation
+namespace compiler
{
-LowerInfo::LowerInfo(const backend::Backend *backend, Layout layout)
- : _permute_factor{backend, layout}
+OperationLowerInfo::OperationLowerInfo(const backend::Backend *backend, ir::Layout layout)
+ : _permute_factor{backend, layout}
{
// DO NOTHING
}
-} // namespace operation
-} // namespace ir
+} // namespace compiler
} // namespace onert
diff --git a/runtime/onert/core/src/compiler/OperationValidator.cc b/runtime/onert/core/src/compiler/OperationValidator.cc
deleted file mode 100644
index f7f659e3e..000000000
--- a/runtime/onert/core/src/compiler/OperationValidator.cc
+++ /dev/null
@@ -1,1053 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationValidator.h"
-
-#include <typeinfo>
-
-#include "ir/Graph.h"
-#include "ir/operation/LowerInfo.h"
-
-#include "util/logging.h"
-#include "util/Utils.h"
-
-#define OP_REQUIRES(EXP) \
- do \
- { \
- if (!(EXP)) \
- throw std::runtime_error("OperationValidator failed at line " + std::to_string(__LINE__)); \
- } while (0)
-
-namespace onert
-{
-namespace compiler
-{
-
-OperationValidator::OperationValidator(const ir::Graph &graph)
- : _graph{graph}, _ctx{graph.operands()}, _current_op_seq_layout{ir::Layout::UNKNOWN}
-{
-}
-
-void OperationValidator::checkUnaryOp(const ir::Operation &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(0)};
-
- // Check if I/O types match
- OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
-
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- // Check if I/O shapes match
- OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
-}
-
-void OperationValidator::operator()()
-{
- // There is no reason for each subgraph to have subgraphs since compiler has subgraphs when
- // creating Compiler
- assert(_graph.subgraphs() == nullptr);
-
- _current_op_seq_layout = _graph.layout();
-
- _graph.operations().iterate(
- [&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); });
-}
-
-void OperationValidator::visit(const ir::operation::BatchMatMul &node)
-{
- const auto lhs_index(node.getInputs().at(ir::operation::BatchMatMul::Input::LHS));
- const auto rhs_index(node.getInputs().at(ir::operation::BatchMatMul::Input::RHS));
- const auto out_index{node.getOutputs().at(0)};
-
- // Constant lhs and rhs is not implemented yet
- OP_REQUIRES(!_ctx.at(lhs_index).isConstant() && !_ctx.at(rhs_index).isConstant());
-
- if (_ctx.at(out_index).info().isDynamic())
- return;
-
- OP_REQUIRES(_ctx.at(lhs_index).shape().rank() <= 4);
- OP_REQUIRES(_ctx.at(rhs_index).shape().rank() <= 4);
- OP_REQUIRES(_ctx.at(lhs_index).shape().rank() >= 2);
- OP_REQUIRES(_ctx.at(rhs_index).shape().rank() >= 2);
-}
-
-void OperationValidator::visit(const ir::operation::BatchToSpaceND &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
- return;
-
- const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
- const auto block_size_index{
- node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
-
- const auto frontend_layout = _current_op_seq_layout;
- const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
- const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
-
- // All requirement as per NNAPI specification.
- OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(block_size_index).shape().rank() == 1);
-
- OP_REQUIRES(_ctx.at(block_size_index).shape().dim(0) == 2);
-
- OP_REQUIRES(_ctx.at(block_size_index).isConstant());
-
- OP_REQUIRES(input_shape.C == output_shape.C);
-}
-
-void OperationValidator::visit(const ir::operation::Comparison &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- // This validator does not check shape. So checking isDynamic() is skipped.
-
- const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
- const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
-
- OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type());
- OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::BOOL8);
-}
-
-void OperationValidator::visit(const ir::operation::Softmax &node)
-{
- VERBOSE(Softmax) << "Configure SOFTMAX operation" << std::endl;
-
- const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- const auto input_index{node.getInputs().at(0)};
-
- OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
-}
-
-void OperationValidator::visit(const ir::operation::InstanceNorm &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
- return;
-
- const auto ifm_index{node.getInputs().at(ir::operation::InstanceNorm::Input::INPUT)};
- const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
- const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
-
- OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(ifm_index).shape() == _ctx.at(ofm_index).shape());
- OP_REQUIRES(_ctx.at(gamma_index).shape().rank() == 1);
- OP_REQUIRES(_ctx.at(beta_index).shape().rank() == 1);
-}
-
-void OperationValidator::visit(const ir::operation::Pool2D &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
- return;
-
- const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
-
- OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
-}
-
-void OperationValidator::visit(const ir::operation::Permute &node)
-{
- VERBOSE(Permute) << "Configure Permute operation" << std::endl;
-
- const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- const auto input_index{node.getInputs().at(0)};
-
- OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
-}
-
-void OperationValidator::visit(const ir::operation::Reduce &node)
-{
- VERBOSE(Permute) << "Configure " + node.name() + " operation" << std::endl;
-
- const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
- const auto input_shape = _ctx.at(input_index).shape();
- const auto output_shape = _ctx.at(output_index).shape();
-
- OP_REQUIRES(input_shape.rank() <= 4);
- OP_REQUIRES(output_shape.rank() <= input_shape.rank());
-
- // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only
- // supports cases reducing height and width or reducing depth.
- // TODO We have to support all cases of dimensions up to 4.
- // For correct permuting, we have to set output's shape to be equal in dimension position of the
- // input. But the positions of the same dimensions in the input and output may be set differently.
- // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original
- // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to
- // extend it in 4 dimensions, it should be {1,1,3,5}.
- // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of
- // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the
- // next operation is not desired.
- if (input_shape.rank() == 4 && input_shape.rank() != output_shape.rank())
- {
- if (output_shape.rank() == 2)
- {
- // Reducing HW
- OP_REQUIRES(input_shape.dim(0) == output_shape.dim(0) &&
- input_shape.dim(3) == output_shape.dim(1));
- }
- else if (output_shape.rank() == 3)
- {
- // Reducing C or
- // (Reducing H and C(input and output) == 1) or (Reducing W and C(input and output) == 1)
- OP_REQUIRES((input_shape.dim(0) == output_shape.dim(0) &&
- input_shape.dim(1) == output_shape.dim(1) &&
- input_shape.dim(2) == output_shape.dim(2)) ||
- (input_shape.dim(0) == output_shape.dim(0) &&
- (input_shape.dim(1) == output_shape.dim(1) ||
- input_shape.dim(2) == output_shape.dim(1)) &&
- input_shape.dim(3) == 1 && output_shape.dim(2) == 1));
- }
- }
-}
-
-void OperationValidator::visit(const ir::operation::Transpose &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
- const auto &perm{node.param().perm};
-
- const auto &output_shape = _ctx.at(output_index).shape();
- const auto &input_shape = _ctx.at(input_index).shape();
-
- OP_REQUIRES(input_shape.rank() == static_cast<int>(perm.size()));
- OP_REQUIRES(input_shape.rank() == output_shape.rank());
-}
-
-void OperationValidator::visit(const ir::operation::RNN &node)
-{
- // NOTE This validation is for static rnn(non-dynamic shape), but not for dynamic rnn
- // TODO Support dynamic rnn
- const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- const auto hidden_state_out_index{
- node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)};
-
- const auto input_index{node.getInputs().at(ir::operation::RNN::Input::INPUT)};
- const auto weights_index{node.getInputs().at(ir::operation::RNN::Input::WEIGHTS)};
- const auto recurrent_weights_index{
- node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)};
- const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)};
- const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)};
-
- const auto batch_size = _ctx.at(output_index).shape().dim(0);
- const auto num_units = _ctx.at(output_index).shape().dim(1);
-
- OP_REQUIRES(_ctx.at(output_index).shape().rank() == 2 &&
- _ctx.at(hidden_state_out_index).shape().rank() == 2 &&
- _ctx.at(input_index).shape().rank() == 2 &&
- _ctx.at(weights_index).shape().rank() == 2 &&
- _ctx.at(recurrent_weights_index).shape().rank() == 2 &&
- _ctx.at(hidden_state_in_index).shape().rank() == 2);
- OP_REQUIRES(_ctx.at(bias_index).shape().rank() == 1);
-
- OP_REQUIRES(batch_size == _ctx.at(input_index).shape().dim(0) &&
- batch_size == _ctx.at(hidden_state_in_index).shape().dim(0) &&
- batch_size == _ctx.at(hidden_state_out_index).shape().dim(0));
- OP_REQUIRES(_ctx.at(input_index).shape().dim(1) == _ctx.at(weights_index).shape().dim(1));
-
- OP_REQUIRES(num_units == _ctx.at(weights_index).shape().dim(0) &&
- num_units == _ctx.at(recurrent_weights_index).shape().dim(0) &&
- num_units == _ctx.at(bias_index).shape().dim(0));
- OP_REQUIRES(num_units == _ctx.at(output_index).shape().dim(1) &&
- num_units == _ctx.at(recurrent_weights_index).shape().dim(1) &&
- num_units == _ctx.at(hidden_state_in_index).shape().dim(1) &&
- num_units == _ctx.at(hidden_state_out_index).shape().dim(1));
-}
-
-void OperationValidator::visit(const ir::operation::SpaceToBatchND &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
- return;
-
- const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
- const auto block_size_index{
- node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
- const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
-
- const auto frontend_layout = _current_op_seq_layout;
- const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
- const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
-
- // All requirement as per NNAPI specification.
- OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(block_size_index).shape().rank() == 1);
- OP_REQUIRES(_ctx.at(paddings_index).shape().rank() == 2);
-
- OP_REQUIRES(_ctx.at(block_size_index).shape().dim(0) == 2);
- OP_REQUIRES(_ctx.at(paddings_index).shape().dim(0) == 2);
- OP_REQUIRES(_ctx.at(paddings_index).shape().dim(1) == 2);
-
- OP_REQUIRES(_ctx.at(block_size_index).isConstant());
- OP_REQUIRES(_ctx.at(paddings_index).isConstant());
-
- OP_REQUIRES(input_shape.C == output_shape.C);
-}
-
-void OperationValidator::visit(const ir::operation::SpaceToDepth &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
- return;
-
- const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
-
- const auto frontend_layout = _current_op_seq_layout;
- const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
- const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
- const auto block_size = node.param().block_size;
-
- // All assertions as per NNAPI specification.
- OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4);
- OP_REQUIRES((block_size >= 1) && (input_shape.H % block_size == 0) &&
- (input_shape.W % block_size == 0));
- OP_REQUIRES(input_shape.N == output_shape.N);
- OP_REQUIRES(input_shape.C * block_size * block_size == output_shape.C);
-}
-
-void OperationValidator::visit(const ir::operation::ElementwiseActivation &node)
-{
- checkUnaryOp(node);
-}
-
-void OperationValidator::visit(const ir::operation::ElementwiseBinary &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
-
- OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type());
- OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(output_index).typeInfo().type());
-}
-
-void OperationValidator::visit(const ir::operation::ElementwiseUnary &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
-
- OP_REQUIRES(node.getInputs().size() == 1);
- OP_REQUIRES(node.getOutputs().size() == 1);
-
- // Check if I/O types match
- if (node.param().op_type == ir::operation::ElementwiseUnary::Type::DEQUANTIZE)
- {
- OP_REQUIRES(_ctx.at(input_index).typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM);
- OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::FLOAT32);
- }
- else if (node.param().op_type == ir::operation::ElementwiseUnary::Type::QUANTIZE)
- {
- OP_REQUIRES(_ctx.at(input_index).typeInfo().type() == ir::DataType::FLOAT32);
- OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM);
- }
- else if (node.param().op_type != ir::operation::ElementwiseUnary::Type::CAST)
- {
- OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
- }
-
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
-}
-
-void OperationValidator::visit(const ir::operation::EmbeddingLookup &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
- const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
-
- const auto &output_obj = _ctx.at(output_index);
- const auto &lookups_obj = _ctx.at(lookups_index);
- const auto &values_obj = _ctx.at(values_index);
-
- // Verify operand here, not at SimpleEmbeddingLookup::configure() to avoid acl's modifying
- // TensorShape sometimes(Issue: https://github.sec.samsung.net/STAR/nnfw/issues/729)
- {
- OP_REQUIRES(lookups_obj.typeInfo().type() == ir::DataType::INT32);
-
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- const auto &output_shape = output_obj.shape();
- const auto &lookups_shape = lookups_obj.shape();
- const auto &values_shape = values_obj.shape();
-
- OP_REQUIRES(lookups_shape.rank() == 1);
- OP_REQUIRES(values_shape.rank() >= 2);
-
- // output should be a n-D tensor with the same rank and shape as the values tensor, except for
- // the first dimension which has the same size as lookups' only dimension.
- OP_REQUIRES(output_shape.rank() == values_shape.rank());
- OP_REQUIRES(output_shape.dim(0) == lookups_shape.dim(0));
- for (int n = 1; n < output_shape.rank(); ++n)
- {
- OP_REQUIRES(output_shape.dim(n) == values_shape.dim(n));
- }
- }
-}
-
-void OperationValidator::visit(const ir::operation::ExpandDims &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
- const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
-
- OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
- OP_REQUIRES(_ctx.at(axis_index).typeInfo().type() == ir::DataType::INT32);
-
- if (_ctx.at(axis_index).info().isDynamic())
- return;
- OP_REQUIRES(_ctx.at(axis_index).shape().rank() <= 1);
-}
-
-void OperationValidator::visit(const ir::operation::HashtableLookup &node)
-{
- const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)};
- const auto hits_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::HITS)};
-
- const auto lookups_index{node.getInputs().at(ir::operation::HashtableLookup::Input::LOOKUPS)};
- const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
- const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
-
- const auto &output_obj = _ctx.at(output_index);
- const auto &hits_obj = _ctx.at(hits_index);
-
- const auto &lookups_obj = _ctx.at(lookups_index);
- const auto &keys_obj = _ctx.at(keys_index);
- const auto &values_obj = _ctx.at(values_index);
-
- OP_REQUIRES(lookups_obj.typeInfo().type() == ir::DataType::INT32);
- OP_REQUIRES(keys_obj.typeInfo().type() == ir::DataType::INT32);
- OP_REQUIRES(hits_obj.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM);
-
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- const auto &output_shape = output_obj.shape();
- const auto &lookups_shape = lookups_obj.shape();
- const auto &keys_shape = keys_obj.shape();
- const auto &values_shape = values_obj.shape();
-
- OP_REQUIRES(values_shape.rank() == output_shape.rank());
- OP_REQUIRES(lookups_shape.rank() == 1);
- OP_REQUIRES(keys_shape.rank() == 1);
- OP_REQUIRES(values_shape.dim(0) == keys_shape.dim(0));
- OP_REQUIRES(lookups_shape.dim(0) == output_shape.dim(0));
-}
-
-void OperationValidator::visit(const ir::operation::TransposeConv &node)
-{
- // param check
- OP_REQUIRES((node.param().padding.type == ir::PaddingType::SAME) ||
- (node.param().padding.type == ir::PaddingType::VALID));
-
- // shape check
- const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
- return;
-
- const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
- const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
-
- // Only 4D tensors are supported
- OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == _ctx.at(ifm_index).shape().rank());
- OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == _ctx.at(ker_index).shape().rank());
-
- const auto frontend_layout = _current_op_seq_layout;
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
- // The kernel has only IHWO layout on frontend
- // So ker_shape is treated here below
- // I -> N
- // H -> H
- // W -> W
- // O -> C
- const auto ker_shape = _ctx.at(ker_index).shape().asFeature(ir::Layout::NHWC);
-
- OP_REQUIRES(ifm_shape.N == ofm_shape.N);
- OP_REQUIRES(ifm_shape.C == ker_shape.C);
- OP_REQUIRES(ker_shape.N == ofm_shape.C);
-}
-
-void OperationValidator::visit(const ir::operation::Gather &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
- return;
-
- const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
- const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
-
- const auto ifm_shape = _ctx.at(ifm_index).shape();
- const auto indices_shape = _ctx.at(indices_index).shape();
- const auto ofm_shape = _ctx.at(ofm_index).shape();
-
- OP_REQUIRES(ifm_shape.rank() <= 4);
- OP_REQUIRES(indices_shape.rank() <= 3);
- OP_REQUIRES(ofm_shape.rank() <= 4);
-}
-
-void OperationValidator::visit(const ir::operation::DepthToSpace &node)
-{
- // param check
- int32_t block_size = node.param().block_size;
-
- OP_REQUIRES(block_size > 0);
-
- // shape check
- const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
-
- const auto frontend_layout = _current_op_seq_layout;
- const auto output_shape = _ctx.at(output_index).shape().asFeature(frontend_layout);
- const auto input_shape = _ctx.at(input_index).shape().asFeature(frontend_layout);
-
- OP_REQUIRES(_ctx.at(input_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(output_index).shape().rank() == 4);
-
- {
- OP_REQUIRES(output_shape.N == input_shape.N);
- OP_REQUIRES(output_shape.H == input_shape.H * block_size);
- OP_REQUIRES(output_shape.W == input_shape.W * block_size);
- OP_REQUIRES(input_shape.C % (block_size * block_size) == 0);
- OP_REQUIRES(output_shape.C == input_shape.C / (block_size * block_size));
- }
-}
-
-void OperationValidator::visit(const ir::operation::Pack &node)
-{
- // param check
- const auto num{node.param().num};
- const auto axis{node.param().axis};
- OP_REQUIRES(num == static_cast<int32_t>(node.getInputs().size()));
-
- const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- // shape check
- const auto &output_shape = _ctx.at(output_index).shape();
- const auto output_rank = static_cast<int32_t>(output_shape.rank());
-
- const auto input1_index{node.getInputs().at(0)};
- const auto input_shape = _ctx.at(input1_index).shape();
-
- OP_REQUIRES(axis >= -output_rank && axis < output_rank);
- for (const auto &index : node.getInputs())
- {
- OP_REQUIRES(input_shape == _ctx.at(index).shape());
- }
-}
-
-void OperationValidator::visit(const ir::operation::LSTM &node)
-{
- // NOTE This validation is for static rnn(non-dynamic shape), but not for dynamic rnn
- // TODO Support dynamic rnn
- const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- const auto scratch_buffer_index{
- node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
- const auto output_state_out_index{
- node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
- const auto cell_state_out_index{
- node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
-
- const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
- const auto input_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)};
- const auto input_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
- const auto input_to_cell_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
- const auto input_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
- const auto recurrent_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)};
- const auto recurrent_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
- const auto recurrent_to_cell_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
- const auto recurrent_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
- const auto cell_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)};
- const auto cell_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)};
- const auto cell_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)};
- const auto input_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
- const auto forget_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
- const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
- const auto output_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
- const auto projection_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)};
- const auto projection_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)};
- const auto output_state_in_index{
- node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
- const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
-
- OP_REQUIRES(_ctx.at(scratch_buffer_index).shape().rank() == 2 &&
- _ctx.at(output_state_out_index).shape().rank() == 2 &&
- _ctx.at(cell_state_out_index).shape().rank() == 2 &&
- _ctx.at(output_index).shape().rank() == 2 &&
- _ctx.at(input_index).shape().rank() == 2 &&
- _ctx.at(input_to_input_weights_index).shape().rank() == 2 &&
- _ctx.at(input_to_forget_weights_index).shape().rank() == 2 &&
- _ctx.at(input_to_cell_weights_index).shape().rank() == 2 &&
- _ctx.at(input_to_output_weights_index).shape().rank() == 2 &&
- _ctx.at(recurrent_to_input_weights_index).shape().rank() == 2 &&
- _ctx.at(recurrent_to_forget_weights_index).shape().rank() == 2 &&
- _ctx.at(recurrent_to_cell_weights_index).shape().rank() == 2 &&
- _ctx.at(recurrent_to_output_weights_index).shape().rank() == 2 &&
- _ctx.at(projection_weights_index).shape().rank() == 2 &&
- _ctx.at(output_state_in_index).shape().rank() == 2 &&
- _ctx.at(cell_state_in_index).shape().rank() == 2);
-
- OP_REQUIRES(_ctx.at(cell_to_input_weights_index).shape().rank() == 1 &&
- _ctx.at(cell_to_forget_weights_index).shape().rank() == 1 &&
- _ctx.at(cell_to_output_weights_index).shape().rank() == 1 &&
- _ctx.at(input_gate_bias_index).shape().rank() == 1 &&
- _ctx.at(forget_gate_bias_index).shape().rank() == 1 &&
- _ctx.at(cell_bias_index).shape().rank() == 1 &&
- _ctx.at(output_gate_bias_index).shape().rank() == 1 &&
- _ctx.at(projection_bias_index).shape().rank() == 1);
-
- // CIFG assertion
- OP_REQUIRES((_ctx.at(input_to_input_weights_index).shape().dim(0) == 0 &&
- _ctx.at(input_to_input_weights_index).shape().dim(1) == 0 &&
- _ctx.at(recurrent_to_input_weights_index).shape().dim(0) == 0 &&
- _ctx.at(recurrent_to_input_weights_index).shape().dim(1) == 0 &&
- _ctx.at(input_gate_bias_index).shape().dim(0) == 0 &&
- _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0) ||
- (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(input_to_input_weights_index).shape().dim(1) != 0 &&
- _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0 &&
- _ctx.at(input_gate_bias_index).shape().dim(0) != 0));
-
- // Peephole assertion
- OP_REQUIRES((_ctx.at(cell_to_forget_weights_index).shape().dim(0) == 0 &&
- _ctx.at(cell_to_output_weights_index).shape().dim(0) == 0) ||
- (_ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0 &&
- _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0));
-
- bool has_input_to_input_weights = _ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(input_to_input_weights_index).shape().dim(1) != 0;
- bool has_recurrent_to_input_weights =
- _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
- bool has_input_gate_bias = _ctx.at(input_gate_bias_index).shape().dim(0) != 0;
- bool has_cell_to_input_weights = _ctx.at(cell_to_input_weights_index).shape().dim(0) != 0;
- bool has_cell_to_forget_weights = _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
- bool has_cell_to_output_weights = _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
- bool has_projection_weights = _ctx.at(projection_weights_index).shape().dim(0) != 0 &&
- _ctx.at(projection_weights_index).shape().dim(1) != 0;
- bool has_projection_bias = _ctx.at(projection_bias_index).shape().dim(0);
-
- // NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG).
- // true: no CIFG
- // false: CIFG
- bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
-
- // NOTE The cell_to_input_weights do not exist in regular CIFG although peephole.
- // true: peephole
- // false: no peephole
- bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights;
-
- // NOTE The projection weights may have data but the projection bias may not.
- bool has_projection_param = has_projection_weights;
-
- const auto batch_size = _ctx.at(input_index).shape().dim(0);
- OP_REQUIRES(batch_size == _ctx.at(output_state_in_index).shape().dim(0) &&
- batch_size == _ctx.at(cell_state_in_index).shape().dim(0) &&
- batch_size == _ctx.at(scratch_buffer_index).shape().dim(0) &&
- batch_size == _ctx.at(output_state_out_index).shape().dim(0) &&
- batch_size == _ctx.at(cell_state_out_index).shape().dim(0) &&
- batch_size == _ctx.at(output_index).shape().dim(0));
-
- const auto input_size = _ctx.at(input_index).shape().dim(1);
- OP_REQUIRES(input_size == _ctx.at(input_to_forget_weights_index).shape().dim(1) &&
- input_size == _ctx.at(input_to_cell_weights_index).shape().dim(1) &&
- input_size == _ctx.at(input_to_output_weights_index).shape().dim(1));
-
- const auto num_units = _ctx.at(cell_state_out_index).shape().dim(1);
- OP_REQUIRES(num_units == _ctx.at(input_to_forget_weights_index).shape().dim(0) &&
- num_units == _ctx.at(input_to_cell_weights_index).shape().dim(0) &&
- num_units == _ctx.at(input_to_output_weights_index).shape().dim(0) &&
- num_units == _ctx.at(recurrent_to_forget_weights_index).shape().dim(0) &&
- num_units == _ctx.at(recurrent_to_cell_weights_index).shape().dim(0) &&
- num_units == _ctx.at(recurrent_to_output_weights_index).shape().dim(0) &&
- num_units == _ctx.at(forget_gate_bias_index).shape().dim(0) &&
- num_units == _ctx.at(cell_bias_index).shape().dim(0) &&
- num_units == _ctx.at(output_gate_bias_index).shape().dim(0) &&
- num_units == _ctx.at(cell_state_in_index).shape().dim(1) &&
- (((num_units * 3) == _ctx.at(scratch_buffer_index).shape().dim(1)) ||
- ((num_units * 4) == _ctx.at(scratch_buffer_index).shape().dim(1))));
-
- const auto output_size = _ctx.at(output_index).shape().dim(1);
- OP_REQUIRES(output_size == _ctx.at(recurrent_to_forget_weights_index).shape().dim(1) &&
- output_size == _ctx.at(recurrent_to_cell_weights_index).shape().dim(1) &&
- output_size == _ctx.at(recurrent_to_output_weights_index).shape().dim(1) &&
- output_size == _ctx.at(output_state_in_index).shape().dim(1) &&
- output_size == _ctx.at(output_state_out_index).shape().dim(1));
-
- if (has_cifg_param)
- {
- OP_REQUIRES(input_size == _ctx.at(input_to_input_weights_index).shape().dim(1));
- OP_REQUIRES(num_units == _ctx.at(input_to_input_weights_index).shape().dim(0) &&
- num_units == _ctx.at(recurrent_to_input_weights_index).shape().dim(0) &&
- (num_units == _ctx.at(cell_to_input_weights_index).shape().dim(0) ||
- _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0 /* non-peephole */) &&
- num_units == _ctx.at(input_gate_bias_index).shape().dim(0));
- OP_REQUIRES(output_size == _ctx.at(recurrent_to_input_weights_index).shape().dim(1));
- OP_REQUIRES(has_input_to_input_weights && has_recurrent_to_input_weights &&
- has_input_gate_bias);
- if (has_cell_to_input_weights)
- {
- // NOTE The cell_to_input_weights exist only in case of non-CIFG and peephole.
- OP_REQUIRES(has_peephole_param);
- }
- OP_REQUIRES(_ctx.at(scratch_buffer_index).shape().dim(1) == num_units * 4);
- }
- else
- {
- OP_REQUIRES(_ctx.at(scratch_buffer_index).shape().dim(1) == num_units * 3);
- }
-
- if (has_peephole_param)
- {
- OP_REQUIRES(num_units == _ctx.at(cell_to_forget_weights_index).shape().dim(0) &&
- num_units == _ctx.at(cell_to_output_weights_index).shape().dim(0) &&
- (num_units == _ctx.at(cell_to_input_weights_index).shape().dim(0) ||
- _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0 /* CIFG */));
- }
-
- if (has_projection_param)
- {
- OP_REQUIRES(num_units == _ctx.at(projection_weights_index).shape().dim(1));
- OP_REQUIRES(output_size == _ctx.at(projection_weights_index).shape().dim(0));
- if (has_projection_bias)
- {
- OP_REQUIRES(output_size == _ctx.at(projection_bias_index).shape().dim(0));
- }
- }
-}
-
-void OperationValidator::visit(const ir::operation::L2Normalization &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
- return;
-
- const auto ifm_index{node.getInputs().at(ir::operation::L2Normalization::Input::INPUT)};
-
- auto ifm_shape = _ctx.at(ifm_index).shape();
- auto ofm_shape = _ctx.at(ofm_index).shape();
-
- OP_REQUIRES(ifm_shape.rank() == ofm_shape.rank());
-
- for (auto i = 0; i < ifm_shape.rank(); i++)
- {
- OP_REQUIRES(ifm_shape.dim(i) == ofm_shape.dim(i));
- }
-}
-
-void OperationValidator::visit(const ir::operation::Unpack &node)
-{
- const auto num{node.param().num};
- OP_REQUIRES(num == static_cast<int32_t>(node.getOutputs().size()));
- const auto axis{node.param().axis};
-
- const auto output_index{node.getInputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)};
-
- const auto &input_shape = _ctx.at(input_index).shape();
- const auto input_rank = static_cast<int32_t>(input_shape.rank());
-
- OP_REQUIRES(axis >= -input_rank && axis < input_rank);
-}
-
-void OperationValidator::visit(const ir::operation::Pad &node)
-{
- const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
- OP_REQUIRES(_ctx.at(pad_index).typeInfo().type() == ir::DataType::INT32);
-
- const auto output_index{node.getInputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
-
- const auto &pad_shape = _ctx.at(pad_index).shape();
- const auto input_rank = static_cast<int32_t>(_ctx.at(input_index).shape().rank());
-
- OP_REQUIRES(pad_shape.rank() == 2);
- OP_REQUIRES(pad_shape.dim(0) == input_rank);
- OP_REQUIRES(pad_shape.dim(1) == 2);
- OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank());
-}
-
-void OperationValidator::visit(const ir::operation::Select &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- // This validator does not check shape. So checking isDynamic() is skipped.
-
- const auto condition_index{node.getInputs().at(ir::operation::Select::Input::CONDITION)};
- const auto input_true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
- const auto input_false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
- UNUSED_RELEASE(output_index);
- UNUSED_RELEASE(input_true_index);
- UNUSED_RELEASE(input_false_index);
-
- OP_REQUIRES(_ctx.at(condition_index).typeInfo().type() == ir::DataType::BOOL8);
-}
-
-void OperationValidator::visit(const ir::operation::StridedSlice &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
- const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
- const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
- const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
-
- UNUSED_RELEASE(starts_index);
- UNUSED_RELEASE(ends_index);
- UNUSED_RELEASE(strides_index);
-
- OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
-
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- OP_REQUIRES(_ctx.at(input_index).shape().rank() <= 4);
-}
-
-void OperationValidator::visit(const ir::operation::Split &node)
-{
- const auto input_index{node.getInputs().at(ir::operation::Split::Input::INPUT)};
-
- if (_ctx.at(input_index).info().isDynamic())
- return;
-
- const auto num_splits = node.param().num_splits;
- const auto input_rank = _ctx.at(input_index).shape().rank();
- const auto axis = node.param().axis < 0 ? node.param().axis + input_rank : node.param().axis;
-
- OP_REQUIRES(num_splits > 0 && num_splits <= 0xFFFF);
- OP_REQUIRES(axis >= 0 && axis < input_rank);
- OP_REQUIRES(node.getOutputs().size() == static_cast<uint32_t>(num_splits));
-
- OP_REQUIRES(_ctx.at(input_index).shape().dim(axis) % num_splits == 0);
-}
-
-void OperationValidator::visit(const ir::operation::Shape &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- const auto input_index{node.getInputs().at(0)};
- UNUSED_RELEASE(input_index);
- OP_REQUIRES(_ctx.at(output_index).shape().rank() == 1);
-}
-
-void OperationValidator::visit(const ir::operation::ResizeBilinear &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
-
- if (_ctx.at(output_index).info().isDynamic())
- {
- return;
- }
- OP_REQUIRES(_ctx.at(input_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(output_index).shape().rank() == 4);
-
- auto align_corners = node.param().align_corners;
- auto half_pixel_centers = node.param().half_pixel_centers;
-
- OP_REQUIRES(!align_corners || !half_pixel_centers);
-}
-
-void OperationValidator::visit(const ir::operation::Reverse &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Reverse::Input::INPUT)};
- const auto axis_index{node.getInputs().at(ir::operation::Reverse::Input::AXIS)};
-
- OP_REQUIRES(_ctx.at(axis_index).typeInfo().type() == ir::DataType::INT32);
- OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
-
- if (_ctx.at(output_index).info().isDynamic())
- return;
- OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
-}
-
-void OperationValidator::visit(const ir::operation::If &)
-{
- // TODO Add to validate with subgraphs
-}
-
-void OperationValidator::visit(const ir::operation::While &node)
-{
- // This validator does not check shape. So checking isDynamic() is skipped.
-
- OP_REQUIRES(node.getInputs().size() == node.getOutputs().size());
- // TODO Add to validate with subgraphs
-}
-
-void OperationValidator::visit(const ir::operation::SquaredDifference &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
-
- // Check for Type equivalence
- OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(lhs_index).typeInfo().type());
- OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type());
-
- // Check for dimension constraints
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- auto output_shape = _ctx.at(output_index).shape();
- auto lhs_shape = _ctx.at(lhs_index).shape();
- auto rhs_shape = _ctx.at(rhs_index).shape();
- // Check for output rank
- OP_REQUIRES(output_shape.rank() == std::max(lhs_shape.rank(), rhs_shape.rank()));
- auto min_rank = std::min(lhs_shape.rank(), rhs_shape.rank());
-
- for (int idx = 1; idx <= min_rank; idx++)
- {
- int l_idx = lhs_shape.rank() - idx;
- int r_idx = rhs_shape.rank() - idx;
- int out_idx = output_shape.rank() - idx;
-
- OP_REQUIRES((l_idx >= 0) && (r_idx >= 0) && (out_idx >= 0));
-
- auto l_dims = lhs_shape.dim(l_idx);
- auto r_dims = rhs_shape.dim(r_idx);
- auto out_dims = output_shape.dim(out_idx);
-
- OP_REQUIRES(((l_dims == r_dims) && (out_dims == l_dims)) ||
- ((l_dims == 1) && (out_dims == r_dims)) || ((r_dims == 1) && (out_dims == l_dims)));
- }
- auto &tmp_shape = (lhs_shape.rank() > rhs_shape.rank()) ? lhs_shape : rhs_shape;
- for (int idx = min_rank + 1; idx <= output_shape.rank(); idx++)
- {
- int out_idx = output_shape.rank() - idx;
- int tmp_idx = tmp_shape.rank() - idx;
-
- OP_REQUIRES((out_idx >= 0) && (tmp_idx >= 0) &&
- (output_shape.dim(out_idx) == tmp_shape.dim(tmp_idx)));
- }
-}
-void OperationValidator::visit(const ir::operation::Tile &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- const auto input_index{node.getInputs().at(0)};
- const auto multiple_index{node.getInputs().at(1)};
-
- OP_REQUIRES(_ctx.at(multiple_index).shape().rank() == 1);
- OP_REQUIRES(_ctx.at(multiple_index).shape().dim(0) == _ctx.at(input_index).shape().rank());
- OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank());
-}
-
-void OperationValidator::visit(const ir::operation::Range &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto start_index{node.getInputs().at(ir::operation::Range::Input::START)};
- const auto limit_index{node.getInputs().at(ir::operation::Range::Input::LIMIT)};
- const auto delta_index{node.getInputs().at(ir::operation::Range::Input::DELTA)};
-
- // Check for dimension constraints
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- OP_REQUIRES(_ctx.at(start_index).shape().rank() == 0);
- OP_REQUIRES(_ctx.at(limit_index).shape().rank() == 0);
- OP_REQUIRES(_ctx.at(delta_index).shape().rank() == 0);
-}
-
-void OperationValidator::visit(const ir::operation::MatrixBandPart &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::MatrixBandPart::Input::INPUT)};
- const auto num_lower_index{
- node.getInputs().at(ir::operation::MatrixBandPart::Input::NUM_LOWER_DIAG)};
- const auto num_upper_index{
- node.getInputs().at(ir::operation::MatrixBandPart::Input::NUM_UPPER_DIAG)};
-
- // Check for dimension constraints
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- OP_REQUIRES(_ctx.at(input_index).shape().rank() >= 2); // input must be more than 2 dim matrix
- OP_REQUIRES(_ctx.at(num_upper_index).shape().rank() == 0); // num_lower must be scalar
- OP_REQUIRES(_ctx.at(num_lower_index).shape().rank() == 0); // num_upper must be scalar
-}
-
-void OperationValidator::visit(const ir::operation::LogSoftmax &node)
-{
- VERBOSE(LogSoftmax) << "Configure LOGSOFTMAX operation" << std::endl;
-
- const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- const auto input_index{node.getInputs().at(0)};
-
- OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
-}
-
-} // namespace compiler
-} // namespace onert
diff --git a/runtime/onert/core/src/compiler/ParamChecker.h b/runtime/onert/core/src/compiler/ParamChecker.h
deleted file mode 100644
index 61429d521..000000000
--- a/runtime/onert/core/src/compiler/ParamChecker.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file ParamChecker.h
- * @brief This file contains ParamChecker to check\n
- * operations' parameters are compilable at machine independent phase\n
- * ex) Check param is constant
- */
-#ifndef __ONERT_COMPILER_PARAM_CHECKER_H__
-#define __ONERT_COMPILER_PARAM_CHECKER_H__
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-class Graph;
-} // namespace ir
-} // namespace onert
-
-namespace onert
-{
-namespace compiler
-{
-
-class ParamChecker : public ir::OperationVisitor
-{
-public:
- /**
- * @brief Construct a new Param Checker object (deleted)
- */
- ParamChecker(void) = delete;
- /**
- * @brief Construct a new Param Checker object
- * @param[in] model Graph model to check
- */
- ParamChecker(std::shared_ptr<ir::Graph> model) : _model{model} {}
-
-public:
- /**
- * @brief Run parameter analysis
- */
- void operator()();
- /**
- * @brief Return analysis result if model have non-const parameter
- * @return @c true if there is non-const parameter, otherwise @c false
- */
- bool haveNoneConstParam(void) { return _nonConstParam; }
-
-private:
- const std::shared_ptr<ir::Graph> _model;
- bool _nonConstParam{false};
-};
-
-} // namespace compiler
-} // namespace onert
-
-#endif // __ONERT_COMPILER_OPERATION_VALIDATOR_H__
diff --git a/runtime/onert/core/src/compiler/PermuteFactor.cc b/runtime/onert/core/src/compiler/PermuteFactor.cc
new file mode 100644
index 000000000..f0081a2a4
--- /dev/null
+++ b/runtime/onert/core/src/compiler/PermuteFactor.cc
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "compiler/PermuteFactor.h"
+
+#include <assert.h>
+#include <ostream>
+
+#include "backend/Backend.h"
+
+std::ostream &operator<<(std::ostream &os, const onert::compiler::PermuteFactor &obj)
+{
+ assert(obj.backend() && obj.backend()->config());
+ return os << "(" << obj.backend()->config()->id() << "/" << to_string(obj.layout()) << ")";
+}
diff --git a/runtime/onert/core/src/compiler/ShapeValidator.cc b/runtime/onert/core/src/compiler/ShapeValidator.cc
new file mode 100644
index 000000000..5c25ea1d1
--- /dev/null
+++ b/runtime/onert/core/src/compiler/ShapeValidator.cc
@@ -0,0 +1,1082 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ShapeValidator.h"
+
+#include <typeinfo>
+
+#include "ir/Graph.h"
+#include "util/logging.h"
+#include "util/Utils.h"
+
+#define OP_REQUIRES(EXP) \
+ do \
+ { \
+ if (!(EXP)) \
+ throw std::runtime_error("ShapeValidator failed at line " + std::to_string(__LINE__)); \
+ } while (0)
+
+namespace onert
+{
+namespace compiler
+{
+
+ShapeValidator::ShapeValidator(const ir::Graph &graph) : _graph{graph} {}
+
+void ShapeValidator::checkUnaryOp(const ir::Operation &node)
+{
+ const auto &operands = _graph.operands();
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(0)};
+
+ if (operands.at(output_index).info().isDynamic())
+ return;
+
+ // Check if I/O shapes match
+ OP_REQUIRES(operands.at(output_index).shape() == operands.at(input_index).shape());
+}
+
+void ShapeValidator::operator()()
+{
+ _graph.operations().iterate(
+ [&](const ir::OperationIndex &, const ir::IOperation &node) { node.accept(*this); });
+}
+
+void ShapeValidator::visit(const ir::operation::BatchMatMul &node)
+{
+ const auto &operands = _graph.operands();
+ const auto lhs_index(node.getInputs().at(ir::operation::BatchMatMul::Input::LHS));
+ const auto rhs_index(node.getInputs().at(ir::operation::BatchMatMul::Input::RHS));
+ const auto out_index{node.getOutputs().at(0)};
+
+ if (operands.at(out_index).info().isDynamic())
+ return;
+
+ OP_REQUIRES(operands.at(lhs_index).shape().rank() <= 4);
+ OP_REQUIRES(operands.at(rhs_index).shape().rank() <= 4);
+ OP_REQUIRES(operands.at(lhs_index).shape().rank() >= 2);
+ OP_REQUIRES(operands.at(rhs_index).shape().rank() >= 2);
+}
+
+void ShapeValidator::visit(const ir::operation::BatchToSpaceND &node)
+{
+ const auto &operands = _graph.operands();
+ const auto ofm_index{node.getOutputs().at(0)};
+ if (operands.at(ofm_index).info().isDynamic())
+ return;
+
+ const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
+ const auto block_size_index{
+ node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
+
+ const auto frontend_layout = _graph.layout();
+ const auto input_shape = operands.at(ifm_index).shape().asFeature(frontend_layout);
+ const auto output_shape = operands.at(ofm_index).shape().asFeature(frontend_layout);
+
+ // All requirement as per NNAPI specification.
+ OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(ofm_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(block_size_index).shape().rank() == 1);
+
+ OP_REQUIRES(operands.at(block_size_index).shape().dim(0) == 2);
+
+ if (node.getInputs().size() != 2)
+ {
+ const auto crops_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::CROPS_DATA)};
+ OP_REQUIRES(operands.at(crops_index).shape().rank() == 2);
+ OP_REQUIRES(operands.at(crops_index).shape().dim(0) ==
+ (operands.at(ifm_index).shape().rank() - 2));
+ OP_REQUIRES(operands.at(crops_index).shape().dim(1) == 2);
+ }
+
+ OP_REQUIRES(input_shape.C == output_shape.C);
+}
+
+void ShapeValidator::visit(const ir::operation::BCQFullyConnected &node)
+{
+ const auto &operands = _graph.operands();
+ const auto ofm_index{node.getOutputs().at(0)};
+ if (operands.at(ofm_index).info().isDynamic())
+ return;
+
+ const auto ifm_index{node.getInputs().at(ir::operation::BCQFullyConnected::Input::INPUT)};
+ const auto weight_scales_index{
+ node.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_SCALES)};
+ const auto weight_binary_index{
+ node.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_BINARY)};
+ const auto weight_cluster_index{
+ node.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)};
+ // const auto bias_index{node.getInputs().at(ir::operation::BCQFullyConnected::Input::BIAS)};
+
+ OP_REQUIRES(operands.at(ifm_index).shape().rank() == 2);
+ OP_REQUIRES(operands.at(ofm_index).shape().rank() == 2);
+ OP_REQUIRES(operands.at(weight_scales_index).shape().rank() == 1);
+ OP_REQUIRES(operands.at(weight_binary_index).shape().rank() == 2);
+ OP_REQUIRES(operands.at(weight_cluster_index).shape().rank() == 2);
+
+ OP_REQUIRES(operands.at(ifm_index).shape().dim(1) == operands.at(ofm_index).shape().dim(1));
+
+ OP_REQUIRES(operands.at(weight_cluster_index).shape().dim(0) > 0);
+ OP_REQUIRES(operands.at(weight_cluster_index).shape().dim(1) == 2);
+
+ // more shape validation will be done inside kernel.
+
+ // TODO Check bias dimension (can be null tensor)
+}
+
+void ShapeValidator::visit(const ir::operation::BCQGather &node)
+{
+ const auto &operands = _graph.operands();
+ const auto ofm_index{node.getOutputs().at(0)};
+ if (operands.at(ofm_index).info().isDynamic())
+ return;
+
+ const auto indices_index{node.getInputs().at(ir::operation::BCQGather::Input::INDICES)};
+ const auto input_binary_index{node.getInputs().at(ir::operation::BCQGather::Input::INPUT_BINARY)};
+ const auto input_scales_index{node.getInputs().at(ir::operation::BCQGather::Input::INPUT_SCALES)};
+ const auto input_clusters_index{
+ node.getInputs().at(ir::operation::BCQGather::Input::INPUT_CLUSTERS)};
+
+ OP_REQUIRES(operands.at(indices_index).shape().rank() <=
+ 2); // TODO : support rank up to 4 or more
+ OP_REQUIRES(operands.at(input_binary_index).shape().rank() == 2);
+ OP_REQUIRES(operands.at(input_scales_index).shape().rank() == 1);
+ OP_REQUIRES(operands.at(input_clusters_index).shape().rank() == 2);
+
+ OP_REQUIRES(operands.at(input_clusters_index).shape().dim(0) > 0);
+ OP_REQUIRES(operands.at(input_clusters_index).shape().dim(1) == 2);
+
+ // more shape validation will be done inside kernel.
+}
+
+void ShapeValidator::visit(const ir::operation::Comparison &)
+{
+ // TODO Shape validation of comparison
+}
+
+void ShapeValidator::visit(const ir::operation::Softmax &node)
+{
+ const auto &operands = _graph.operands();
+ const auto output_index{node.getOutputs().at(0)};
+ if (operands.at(output_index).info().isDynamic())
+ return;
+
+ const auto input_index{node.getInputs().at(0)};
+
+ OP_REQUIRES(operands.at(output_index).shape().rank() == operands.at(input_index).shape().rank());
+}
+
+void ShapeValidator::visit(const ir::operation::InstanceNorm &node)
+{
+ const auto &operands = _graph.operands();
+ const auto ofm_index{node.getOutputs().at(0)};
+ if (operands.at(ofm_index).info().isDynamic())
+ return;
+
+ const auto ifm_index{node.getInputs().at(ir::operation::InstanceNorm::Input::INPUT)};
+ const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
+ const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
+
+ OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(ifm_index).shape() == operands.at(ofm_index).shape());
+ OP_REQUIRES(operands.at(gamma_index).shape().rank() == 1);
+ OP_REQUIRES(operands.at(beta_index).shape().rank() == 1);
+}
+
+void ShapeValidator::visit(const ir::operation::Pool2D &node)
+{
+ const auto &operands = _graph.operands();
+ const auto ofm_index{node.getOutputs().at(0)};
+ if (operands.at(ofm_index).info().isDynamic())
+ return;
+
+ const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
+
+ OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4);
+}
+
+void ShapeValidator::visit(const ir::operation::Permute &node)
+{
+ const auto &operands = _graph.operands();
+ const auto output_index{node.getOutputs().at(0)};
+ if (operands.at(output_index).info().isDynamic())
+ return;
+
+ const auto input_index{node.getInputs().at(0)};
+
+ OP_REQUIRES(operands.at(output_index).shape().rank() == operands.at(input_index).shape().rank());
+}
+
+void ShapeValidator::visit(const ir::operation::Reduce &node)
+{
+ const auto &operands = _graph.operands();
+ const auto output_index{node.getOutputs().at(0)};
+ if (operands.at(output_index).info().isDynamic())
+ return;
+
+ const auto &input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
+ const auto &input_shape = operands.at(input_index).shape();
+ const auto &output_shape = operands.at(output_index).shape();
+
+ OP_REQUIRES(input_shape.rank() <= 4);
+ OP_REQUIRES(output_shape.rank() <= input_shape.rank());
+
+ // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only
+ // supports cases reducing height and width or reducing depth.
+ // TODO We have to support all cases of dimensions up to 4.
+ // For correct permuting, we have to set output's shape to be equal in dimension position of the
+ // input. But the positions of the same dimensions in the input and output may be set differently.
+ // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original
+ // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to
+ // extend it in 4 dimensions, it should be {1,1,3,5}.
+ // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of
+ // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the
+ // next operation is not desired.
+ if (input_shape.rank() == 4 && input_shape.rank() != output_shape.rank())
+ {
+ if (output_shape.rank() == 2)
+ {
+ // Reducing HW
+ OP_REQUIRES(input_shape.dim(0) == output_shape.dim(0) &&
+ input_shape.dim(3) == output_shape.dim(1));
+ }
+ else if (output_shape.rank() == 3)
+ {
+ // Reducing C or
+ // (Reducing H and C(input and output) == 1) or (Reducing W and C(input and output) == 1)
+ OP_REQUIRES(
+ (input_shape.dim(0) == output_shape.dim(0) && input_shape.dim(1) == output_shape.dim(1) &&
+ input_shape.dim(2) == output_shape.dim(2)) ||
+ (input_shape.dim(0) == output_shape.dim(0) &&
+ (input_shape.dim(1) == output_shape.dim(1) || input_shape.dim(2) == output_shape.dim(1)) &&
+ input_shape.dim(3) == 1 && output_shape.dim(2) == 1));
+ }
+ }
+}
+
+void ShapeValidator::visit(const ir::operation::Transpose &node)
+{
+ const auto &operands = _graph.operands();
+ const auto output_index{node.getOutputs().at(0)};
+ if (operands.at(output_index).info().isDynamic())
+ return;
+
+ const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
+ const auto perm_index{node.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
+
+ const auto &output_shape = operands.at(output_index).shape();
+ const auto &input_shape = operands.at(input_index).shape();
+
+ OP_REQUIRES(operands.at(perm_index).shape().num_elements() == 0 ||
+ input_shape.rank() ==
+ static_cast<int>(operands.at(perm_index).shape().num_elements()));
+ OP_REQUIRES(input_shape.rank() == output_shape.rank());
+}
+
+void ShapeValidator::visit(const ir::operation::RNN &node)
+{
+ // NOTE This validation is for static rnn(non-dynamic shape), but not for dynamic rnn
+ // TODO Support dynamic rnn
+ const auto &operands = _graph.operands();
+ const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)};
+ if (operands.at(output_index).info().isDynamic())
+ return;
+
+ const auto hidden_state_out_index{
+ node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)};
+
+ const auto input_index{node.getInputs().at(ir::operation::RNN::Input::INPUT)};
+ const auto weights_index{node.getInputs().at(ir::operation::RNN::Input::WEIGHTS)};
+ const auto recurrent_weights_index{
+ node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)};
+ const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)};
+ const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)};
+
+ const auto batch_size = operands.at(output_index).shape().dim(0);
+ const auto num_units = operands.at(output_index).shape().dim(1);
+
+ OP_REQUIRES(operands.at(output_index).shape().rank() == 2 &&
+ operands.at(hidden_state_out_index).shape().rank() == 2 &&
+ operands.at(input_index).shape().rank() == 2 &&
+ operands.at(weights_index).shape().rank() == 2 &&
+ operands.at(recurrent_weights_index).shape().rank() == 2 &&
+ operands.at(hidden_state_in_index).shape().rank() == 2);
+ OP_REQUIRES(operands.at(bias_index).shape().rank() == 1);
+
+ OP_REQUIRES(batch_size == operands.at(input_index).shape().dim(0) &&
+ batch_size == operands.at(hidden_state_in_index).shape().dim(0) &&
+ batch_size == operands.at(hidden_state_out_index).shape().dim(0));
+ OP_REQUIRES(operands.at(input_index).shape().dim(1) == operands.at(weights_index).shape().dim(1));
+
+ OP_REQUIRES(num_units == operands.at(weights_index).shape().dim(0) &&
+ num_units == operands.at(recurrent_weights_index).shape().dim(0) &&
+ num_units == operands.at(bias_index).shape().dim(0));
+ OP_REQUIRES(num_units == operands.at(output_index).shape().dim(1) &&
+ num_units == operands.at(recurrent_weights_index).shape().dim(1) &&
+ num_units == operands.at(hidden_state_in_index).shape().dim(1) &&
+ num_units == operands.at(hidden_state_out_index).shape().dim(1));
+}
+
+void ShapeValidator::visit(const ir::operation::SpaceToBatchND &node)
+{
+ const auto &operands = _graph.operands();
+ const auto ofm_index{node.getOutputs().at(0)};
+ if (operands.at(ofm_index).info().isDynamic())
+ return;
+
+ const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
+ const auto block_size_index{
+ node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
+ const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
+
+ const auto frontend_layout = _graph.layout();
+ const auto input_shape = operands.at(ifm_index).shape().asFeature(frontend_layout);
+ const auto output_shape = operands.at(ofm_index).shape().asFeature(frontend_layout);
+
+ // All requirement as per NNAPI specification.
+ OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(ofm_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(block_size_index).shape().rank() == 1);
+ OP_REQUIRES(operands.at(paddings_index).shape().rank() == 2);
+
+ OP_REQUIRES(operands.at(block_size_index).shape().dim(0) == 2);
+ OP_REQUIRES(operands.at(paddings_index).shape().dim(0) == 2);
+ OP_REQUIRES(operands.at(paddings_index).shape().dim(1) == 2);
+
+ OP_REQUIRES(input_shape.C == output_shape.C);
+}
+
+void ShapeValidator::visit(const ir::operation::SpaceToDepth &node)
+{
+ const auto &operands = _graph.operands();
+ const auto ofm_index{node.getOutputs().at(0)};
+ if (operands.at(ofm_index).info().isDynamic())
+ return;
+
+ const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
+
+ const auto frontend_layout = _graph.layout();
+ const auto input_shape = operands.at(ifm_index).shape().asFeature(frontend_layout);
+ const auto output_shape = operands.at(ofm_index).shape().asFeature(frontend_layout);
+ const auto block_size = node.param().block_size;
+
+ // All assertions as per NNAPI specification.
+ OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(ofm_index).shape().rank() == 4);
+ OP_REQUIRES((input_shape.H % block_size == 0) && (input_shape.W % block_size == 0));
+ OP_REQUIRES(input_shape.N == output_shape.N);
+ OP_REQUIRES(input_shape.C * block_size * block_size == output_shape.C);
+}
+
+void ShapeValidator::visit(const ir::operation::ElementwiseActivation &node) { checkUnaryOp(node); }
+
+void ShapeValidator::visit(const ir::operation::ElementwiseBinary &)
+{
+ // TODO Shape validation of ElementwiseBinary
+}
+
+void ShapeValidator::visit(const ir::operation::ElementwiseUnary &node)
+{
+ const auto &operands = _graph.operands();
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
+
+ if (operands.at(output_index).info().isDynamic())
+ return;
+
+ OP_REQUIRES(operands.at(output_index).shape() == operands.at(input_index).shape());
+}
+
+void ShapeValidator::visit(const ir::operation::EmbeddingLookup &node)
+{
+ const auto &operands = _graph.operands();
+ const auto output_index{node.getOutputs().at(0)};
+ const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
+ const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
+
+ const auto &output_obj = operands.at(output_index);
+ const auto &lookups_obj = operands.at(lookups_index);
+ const auto &values_obj = operands.at(values_index);
+
+ // Verify operand here, not at SimpleEmbeddingLookup::configure() to avoid acl's modifying
+ // TensorShape sometimes(Issue: https://github.sec.samsung.net/STAR/nnfw/issues/729)
+ {
+ if (operands.at(output_index).info().isDynamic())
+ return;
+
+ const auto &output_shape = output_obj.shape();
+ const auto &lookups_shape = lookups_obj.shape();
+ const auto &values_shape = values_obj.shape();
+
+ OP_REQUIRES(lookups_shape.rank() == 1);
+ OP_REQUIRES(values_shape.rank() >= 2);
+
+ // output should be a n-D tensor with the same rank and shape as the values tensor, except for
+ // the first dimension which has the same size as lookups' only dimension.
+ OP_REQUIRES(output_shape.rank() == values_shape.rank());
+ OP_REQUIRES(output_shape.dim(0) == lookups_shape.dim(0));
+ for (int n = 1; n < output_shape.rank(); ++n)
+ {
+ OP_REQUIRES(output_shape.dim(n) == values_shape.dim(n));
+ }
+ }
+}
+
+void ShapeValidator::visit(const ir::operation::ExpandDims &node)
+{
+ const auto &operands = _graph.operands();
+ const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
+
+ if (operands.at(axis_index).info().isDynamic())
+ return;
+ OP_REQUIRES(operands.at(axis_index).shape().rank() <= 1);
+}
+
+void ShapeValidator::visit(const ir::operation::HashtableLookup &node)
+{
+ const auto &operands = _graph.operands();
+ const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)};
+ const auto lookups_index{node.getInputs().at(ir::operation::HashtableLookup::Input::LOOKUPS)};
+ const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
+ const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
+
+ const auto &output_obj = operands.at(output_index);
+ const auto &lookups_obj = operands.at(lookups_index);
+ const auto &keys_obj = operands.at(keys_index);
+ const auto &values_obj = operands.at(values_index);
+
+ if (operands.at(output_index).info().isDynamic())
+ return;
+
+ const auto &output_shape = output_obj.shape();
+ const auto &lookups_shape = lookups_obj.shape();
+ const auto &keys_shape = keys_obj.shape();
+ const auto &values_shape = values_obj.shape();
+
+ OP_REQUIRES(values_shape.rank() == output_shape.rank());
+ OP_REQUIRES(lookups_shape.rank() == 1);
+ OP_REQUIRES(keys_shape.rank() == 1);
+ OP_REQUIRES(values_shape.dim(0) == keys_shape.dim(0));
+ OP_REQUIRES(lookups_shape.dim(0) == output_shape.dim(0));
+}
+
+void ShapeValidator::visit(const ir::operation::TransposeConv &node)
+{
+ // shape check
+ const auto &operands = _graph.operands();
+ const auto ofm_index{node.getOutputs().at(0)};
+
+ if (operands.at(ofm_index).info().isDynamic())
+ return;
+
+ const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
+ const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
+
+ // Only 4D tensors are supported
+ OP_REQUIRES(operands.at(ofm_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(ofm_index).shape().rank() == operands.at(ifm_index).shape().rank());
+ OP_REQUIRES(operands.at(ofm_index).shape().rank() == operands.at(ker_index).shape().rank());
+
+ const auto frontend_layout = _graph.layout();
+ const auto ofm_shape = operands.at(ofm_index).shape().asFeature(frontend_layout);
+ const auto ifm_shape = operands.at(ifm_index).shape().asFeature(frontend_layout);
+ // The kernel has only IHWO layout on frontend
+ // So ker_shape is treated here below
+ // I -> N
+ // H -> H
+ // W -> W
+ // O -> C
+ const auto ker_shape = operands.at(ker_index).shape().asFeature(ir::Layout::NHWC);
+
+ OP_REQUIRES(ifm_shape.N == ofm_shape.N);
+ OP_REQUIRES(ifm_shape.C == ker_shape.C);
+ OP_REQUIRES(ker_shape.N == ofm_shape.C);
+}
+
+void ShapeValidator::visit(const ir::operation::Gather &node)
+{
+ const auto &operands = _graph.operands();
+ const auto ofm_index{node.getOutputs().at(0)};
+ if (operands.at(ofm_index).info().isDynamic())
+ return;
+
+ const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
+ const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
+
+ const auto &ifm_shape = operands.at(ifm_index).shape();
+ const auto &indices_shape = operands.at(indices_index).shape();
+ const auto &ofm_shape = operands.at(ofm_index).shape();
+
+ OP_REQUIRES(ifm_shape.rank() <= 4);
+ OP_REQUIRES(indices_shape.rank() <= 3);
+ OP_REQUIRES(ofm_shape.rank() <= 4);
+}
+
+void ShapeValidator::visit(const ir::operation::DepthToSpace &node)
+{
+ const auto &operands = _graph.operands();
+ int32_t block_size = node.param().block_size;
+
+ // shape check
+ const auto output_index{node.getOutputs().at(0)};
+ if (operands.at(output_index).info().isDynamic())
+ return;
+
+ const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
+
+ const auto frontend_layout = _graph.layout();
+ const auto output_shape = operands.at(output_index).shape().asFeature(frontend_layout);
+ const auto input_shape = operands.at(input_index).shape().asFeature(frontend_layout);
+
+ OP_REQUIRES(operands.at(input_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(output_index).shape().rank() == 4);
+
+ {
+ OP_REQUIRES(output_shape.N == input_shape.N);
+ OP_REQUIRES(output_shape.H == input_shape.H * block_size);
+ OP_REQUIRES(output_shape.W == input_shape.W * block_size);
+ OP_REQUIRES(input_shape.C % (block_size * block_size) == 0);
+ OP_REQUIRES(output_shape.C == input_shape.C / (block_size * block_size));
+ }
+}
+
+void ShapeValidator::visit(const ir::operation::Pack &node)
+{
+ const auto &operands = _graph.operands();
+ const auto axis{node.param().axis};
+ const auto output_index{node.getOutputs().at(0)};
+ if (operands.at(output_index).info().isDynamic())
+ return;
+
+ // shape check
+ const auto &output_shape = operands.at(output_index).shape();
+ const auto output_rank = static_cast<int32_t>(output_shape.rank());
+
+ const auto input1_index{node.getInputs().at(0)};
+ const auto &input_shape = operands.at(input1_index).shape();
+
+ OP_REQUIRES(axis >= -output_rank && axis < output_rank);
+ for (const auto &index : node.getInputs())
+ {
+ OP_REQUIRES(input_shape == operands.at(index).shape());
+ }
+}
+
+void ShapeValidator::visit(const ir::operation::LSTM &node)
+{
+ // NOTE This validation is for static rnn(non-dynamic shape), but not for dynamic rnn
+ // TODO Support dynamic rnn
+ const auto &operands = _graph.operands();
+ const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
+ if (operands.at(output_index).info().isDynamic())
+ return;
+
+ const auto scratch_buffer_index{
+ node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)}; // Optional
+ const auto output_state_out_index{
+ node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)}; // Optional
+ const auto cell_state_out_index{
+ node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)}; // Optional
+
+ const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
+ const auto input_to_input_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // Optional
+ const auto input_to_forget_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
+ const auto input_to_cell_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
+ const auto input_to_output_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
+ const auto recurrent_to_input_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // Optional
+ const auto recurrent_to_forget_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
+ const auto recurrent_to_cell_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
+ const auto recurrent_to_output_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
+ const auto cell_to_input_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // Optional
+ const auto cell_to_forget_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // Optional
+ const auto cell_to_output_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // Optional
+ const auto input_gate_bias_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)}; // Optional
+ const auto forget_gate_bias_index{
+ node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
+ const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
+ const auto output_gate_bias_index{
+ node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
+ const auto projection_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // Optional
+ const auto projection_bias_index{
+ node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // Optional
+ const auto output_state_in_index{
+ node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
+ const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
+
+ OP_REQUIRES(operands.at(input_index).shape().rank() == operands.at(output_index).shape().rank());
+ for (int i = 0; i < operands.at(input_index).shape().rank() - 1; ++i)
+ {
+ OP_REQUIRES(operands.at(input_index).shape().dim(i) ==
+ operands.at(output_index).shape().dim(i));
+ }
+ OP_REQUIRES((operands.at(output_index).shape().rank() == 2 ||
+ operands.at(output_index).shape().rank() == 3) &&
+ (operands.at(input_index).shape().rank() == 2 ||
+ operands.at(input_index).shape().rank() == 3) &&
+ (!operands.exist(input_to_input_weights_index) ||
+ operands.at(input_to_input_weights_index).shape().rank() == 2) &&
+ operands.at(input_to_forget_weights_index).shape().rank() == 2 &&
+ operands.at(input_to_cell_weights_index).shape().rank() == 2 &&
+ operands.at(input_to_output_weights_index).shape().rank() == 2 &&
+ (!operands.exist(recurrent_to_input_weights_index) ||
+ operands.at(recurrent_to_input_weights_index).shape().rank() == 2) &&
+ operands.at(recurrent_to_forget_weights_index).shape().rank() == 2 &&
+ operands.at(recurrent_to_cell_weights_index).shape().rank() == 2 &&
+ operands.at(recurrent_to_output_weights_index).shape().rank() == 2 &&
+ (!operands.exist(projection_weights_index) ||
+ operands.at(projection_weights_index).shape().rank() == 2) &&
+ operands.at(output_state_in_index).shape().rank() == 2 &&
+ operands.at(cell_state_in_index).shape().rank() == 2);
+
+ OP_REQUIRES((!operands.exist(cell_to_input_weights_index) ||
+ operands.at(cell_to_input_weights_index).shape().rank() == 1) &&
+ (!operands.exist(cell_to_forget_weights_index) ||
+ operands.at(cell_to_forget_weights_index).shape().rank() == 1) &&
+ (!operands.exist(cell_to_output_weights_index) ||
+ operands.at(cell_to_output_weights_index).shape().rank() == 1) &&
+ (!operands.exist(input_gate_bias_index) ||
+ operands.at(input_gate_bias_index).shape().rank() == 1) &&
+ operands.at(forget_gate_bias_index).shape().rank() == 1 &&
+ operands.at(cell_bias_index).shape().rank() == 1 &&
+ operands.at(output_gate_bias_index).shape().rank() == 1 &&
+ (!operands.exist(projection_bias_index) ||
+ operands.at(projection_bias_index).shape().rank() == 1));
+
+ // CIFG assertion
+ OP_REQUIRES(((!operands.exist(input_to_input_weights_index) ||
+ (operands.at(input_to_input_weights_index).shape().dim(0) == 0 &&
+ operands.at(input_to_input_weights_index).shape().dim(1) == 0)) &&
+ (!operands.exist(recurrent_to_input_weights_index) ||
+ (operands.at(recurrent_to_input_weights_index).shape().dim(0) == 0 &&
+ operands.at(recurrent_to_input_weights_index).shape().dim(1) == 0)) &&
+ (!operands.exist(input_gate_bias_index) ||
+ operands.at(input_gate_bias_index).shape().dim(0) == 0) &&
+ (!operands.exist(cell_to_input_weights_index) ||
+ operands.at(cell_to_input_weights_index).shape().dim(0) == 0)) ||
+ ((operands.exist(input_to_input_weights_index) &&
+ (operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
+ operands.at(input_to_input_weights_index).shape().dim(1) != 0)) &&
+ (operands.exist(recurrent_to_input_weights_index) &&
+ (operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+ operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0)) &&
+ (operands.exist(input_gate_bias_index) &&
+ operands.at(input_gate_bias_index).shape().dim(0) != 0)));
+
+ // Peephole assertion
+ OP_REQUIRES(((!operands.exist(cell_to_forget_weights_index) ||
+ operands.at(cell_to_forget_weights_index).shape().dim(0) == 0) &&
+ (!operands.exist(cell_to_output_weights_index) ||
+ operands.at(cell_to_output_weights_index).shape().dim(0) == 0)) ||
+ ((operands.exist(cell_to_forget_weights_index) &&
+ operands.at(cell_to_forget_weights_index).shape().dim(0) != 0) &&
+ (operands.exist(cell_to_output_weights_index) &&
+ operands.at(cell_to_output_weights_index).shape().dim(0) != 0)));
+
+ bool has_input_to_input_weights =
+ operands.exist(input_to_input_weights_index) &&
+ (operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
+ operands.at(input_to_input_weights_index).shape().dim(1) != 0);
+ bool has_recurrent_to_input_weights =
+ operands.exist(recurrent_to_input_weights_index) &&
+ (operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+ operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0);
+ bool has_input_gate_bias =
+ operands.exist(input_gate_bias_index) && operands.at(input_gate_bias_index).shape().dim(0) != 0;
+ bool has_cell_to_input_weights = operands.exist(cell_to_input_weights_index) &&
+ operands.at(cell_to_input_weights_index).shape().dim(0) != 0;
+ bool has_cell_to_forget_weights = operands.exist(cell_to_forget_weights_index) &&
+ operands.at(cell_to_forget_weights_index).shape().dim(0) != 0;
+ bool has_cell_to_output_weights = operands.exist(cell_to_output_weights_index) &&
+ operands.at(cell_to_output_weights_index).shape().dim(0) != 0;
+ bool has_projection_weights = operands.exist(projection_weights_index) &&
+ (operands.at(projection_weights_index).shape().dim(0) != 0 &&
+ operands.at(projection_weights_index).shape().dim(1) != 0);
+ bool has_projection_bias =
+ operands.exist(projection_bias_index) && operands.at(projection_bias_index).shape().dim(0) != 0;
+
+ // NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG).
+ // true: no CIFG
+ // false: CIFG
+ bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
+
+ // NOTE The cell_to_input_weights do not exist in regular CIFG although peephole.
+ // true: peephole
+ // false: no peephole
+ bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights;
+
+ // NOTE The projection weights may have data but the projection bias may not.
+ bool has_projection_param = has_projection_weights;
+
+ const auto batch_size = (operands.at(input_index).shape().rank() == 3 && node.param().time_major)
+ ? operands.at(input_index).shape().dim(1)
+ : operands.at(input_index).shape().dim(0);
+ OP_REQUIRES(batch_size == operands.at(output_state_in_index).shape().dim(0) &&
+ batch_size == operands.at(cell_state_in_index).shape().dim(0));
+
+ const auto input_size =
+ operands.at(input_index).shape().dim(operands.at(input_index).shape().rank() - 1);
+ OP_REQUIRES(input_size == operands.at(input_to_forget_weights_index).shape().dim(1) &&
+ input_size == operands.at(input_to_cell_weights_index).shape().dim(1) &&
+ input_size == operands.at(input_to_output_weights_index).shape().dim(1));
+
+ const auto num_units = operands.at(input_to_output_weights_index).shape().dim(0);
+ OP_REQUIRES(num_units == operands.at(input_to_cell_weights_index).shape().dim(0) &&
+ num_units == operands.at(input_to_output_weights_index).shape().dim(0) &&
+ num_units == operands.at(recurrent_to_forget_weights_index).shape().dim(0) &&
+ num_units == operands.at(recurrent_to_cell_weights_index).shape().dim(0) &&
+ num_units == operands.at(recurrent_to_output_weights_index).shape().dim(0) &&
+ num_units == operands.at(forget_gate_bias_index).shape().dim(0) &&
+ num_units == operands.at(cell_bias_index).shape().dim(0) &&
+ num_units == operands.at(output_gate_bias_index).shape().dim(0) &&
+ num_units == operands.at(cell_state_in_index).shape().dim(1));
+
+ const auto output_size =
+ operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 1);
+ OP_REQUIRES(output_size == operands.at(recurrent_to_forget_weights_index).shape().dim(1) &&
+ output_size == operands.at(recurrent_to_cell_weights_index).shape().dim(1) &&
+ output_size == operands.at(recurrent_to_output_weights_index).shape().dim(1) &&
+ output_size == operands.at(output_state_in_index).shape().dim(1));
+
+ if (has_cifg_param)
+ {
+ OP_REQUIRES(input_size == operands.at(input_to_input_weights_index).shape().dim(1));
+ OP_REQUIRES(
+ num_units == operands.at(input_to_input_weights_index).shape().dim(0) &&
+ num_units == operands.at(recurrent_to_input_weights_index).shape().dim(0) &&
+ ((operands.exist(cell_to_input_weights_index) &&
+ num_units == operands.at(cell_to_input_weights_index).shape().dim(0)) ||
+ (!operands.exist(cell_to_input_weights_index) ||
+ operands.at(cell_to_input_weights_index).shape().dim(0) == 0) /* non-peephole */) &&
+ num_units == operands.at(input_gate_bias_index).shape().dim(0));
+ OP_REQUIRES(output_size == operands.at(recurrent_to_input_weights_index).shape().dim(1));
+ OP_REQUIRES(has_input_to_input_weights && has_recurrent_to_input_weights &&
+ has_input_gate_bias);
+ if (has_cell_to_input_weights)
+ {
+ // NOTE The cell_to_input_weights exist only in case of non-CIFG and peephole.
+ OP_REQUIRES(has_peephole_param);
+ }
+ if (operands.exist(scratch_buffer_index))
+ OP_REQUIRES(operands.at(scratch_buffer_index).shape().dim(1) == num_units * 4);
+ }
+ else
+ {
+ if (operands.exist(scratch_buffer_index))
+ OP_REQUIRES(operands.at(scratch_buffer_index).shape().dim(1) == num_units * 3);
+ }
+
+ if (has_peephole_param)
+ {
+ OP_REQUIRES(num_units == operands.at(cell_to_forget_weights_index).shape().dim(0) &&
+ num_units == operands.at(cell_to_output_weights_index).shape().dim(0) &&
+ (num_units == operands.at(cell_to_input_weights_index).shape().dim(0) ||
+ operands.at(cell_to_input_weights_index).shape().dim(0) == 0 /* CIFG */));
+ }
+
+ if (has_projection_param)
+ {
+ OP_REQUIRES(num_units == operands.at(projection_weights_index).shape().dim(1));
+ OP_REQUIRES(output_size == operands.at(projection_weights_index).shape().dim(0));
+ if (has_projection_bias)
+ {
+ OP_REQUIRES(output_size == operands.at(projection_bias_index).shape().dim(0));
+ }
+ }
+
+ if (operands.exist(scratch_buffer_index))
+ {
+ OP_REQUIRES(operands.at(scratch_buffer_index).shape().rank() == 2);
+ OP_REQUIRES(batch_size == operands.at(scratch_buffer_index).shape().dim(0));
+ }
+
+ if (operands.exist(output_state_out_index))
+ {
+ OP_REQUIRES(operands.at(output_state_out_index).shape().rank() == 2);
+ OP_REQUIRES(batch_size == operands.at(output_state_out_index).shape().dim(0));
+ OP_REQUIRES(output_size == operands.at(output_state_out_index).shape().dim(1));
+ }
+
+ if (operands.exist(cell_state_out_index))
+ {
+ OP_REQUIRES(operands.at(cell_state_out_index).shape().rank() == 2);
+ OP_REQUIRES(batch_size == operands.at(cell_state_out_index).shape().dim(0));
+ OP_REQUIRES(num_units == operands.at(cell_state_out_index).shape().dim(1));
+ }
+}
+
+void ShapeValidator::visit(const ir::operation::L2Normalization &node)
+{
+ const auto &operands = _graph.operands();
+ const auto ofm_index{node.getOutputs().at(0)};
+ if (operands.at(ofm_index).info().isDynamic())
+ return;
+
+ const auto ifm_index{node.getInputs().at(ir::operation::L2Normalization::Input::INPUT)};
+
+ auto ifm_shape = operands.at(ifm_index).shape();
+ auto ofm_shape = operands.at(ofm_index).shape();
+
+ OP_REQUIRES(ifm_shape.rank() == ofm_shape.rank());
+
+ for (auto i = 0; i < ifm_shape.rank(); i++)
+ {
+ OP_REQUIRES(ifm_shape.dim(i) == ofm_shape.dim(i));
+ }
+}
+
+void ShapeValidator::visit(const ir::operation::Unpack &node)
+{
+ const auto &operands = _graph.operands();
+ const auto axis{node.param().axis};
+ const auto output_index{node.getInputs().at(0)};
+ if (operands.at(output_index).info().isDynamic())
+ return;
+
+ const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)};
+
+ const auto &input_shape = operands.at(input_index).shape();
+ const auto input_rank = static_cast<int32_t>(input_shape.rank());
+
+ OP_REQUIRES(axis >= -input_rank && axis < input_rank);
+}
+
+void ShapeValidator::visit(const ir::operation::Pad &node)
+{
+ const auto &operands = _graph.operands();
+ const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
+ OP_REQUIRES(operands.at(pad_index).typeInfo().type() == ir::DataType::INT32);
+
+ const auto output_index{node.getInputs().at(0)};
+ if (operands.at(output_index).info().isDynamic())
+ return;
+
+ const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
+
+ const auto &pad_shape = operands.at(pad_index).shape();
+ const auto input_rank = static_cast<int32_t>(operands.at(input_index).shape().rank());
+
+ OP_REQUIRES(pad_shape.rank() == 2);
+ OP_REQUIRES(pad_shape.dim(0) == input_rank);
+ OP_REQUIRES(pad_shape.dim(1) == 2);
+ OP_REQUIRES(operands.at(input_index).shape().rank() == operands.at(output_index).shape().rank());
+}
+
+void ShapeValidator::visit(const ir::operation::Select &)
+{
+ // TODO Shape validation of select
+}
+
+void ShapeValidator::visit(const ir::operation::StridedSlice &node)
+{
+ const auto &operands = _graph.operands();
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
+
+ if (operands.at(output_index).info().isDynamic())
+ return;
+
+ OP_REQUIRES(operands.at(input_index).shape().rank() <= 4);
+}
+
+void ShapeValidator::visit(const ir::operation::Split &node)
+{
+ const auto &operands = _graph.operands();
+ const auto output_index{node.getOutputs().at(0)};
+ if (operands.at(output_index).info().isDynamic())
+ return;
+
+ const auto input_index{node.getInputs().at(ir::operation::Split::Input::INPUT)};
+ const auto axis_index{node.getInputs().at(ir::operation::Split::Input::AXIS)};
+
+ const auto num_splits = node.param().num_splits;
+ const auto input_rank = operands.at(input_index).shape().rank();
+ auto axis = *reinterpret_cast<const int32_t *>(operands.at(axis_index).data()->base());
+ axis = axis < 0 ? axis + input_rank : axis;
+
+ OP_REQUIRES(axis >= 0 && axis < input_rank);
+ OP_REQUIRES(operands.at(input_index).shape().dim(axis) % num_splits == 0);
+}
+
+void ShapeValidator::visit(const ir::operation::Shape &node)
+{
+ const auto &operands = _graph.operands();
+ const auto output_index{node.getOutputs().at(0)};
+ if (operands.at(output_index).info().isDynamic())
+ return;
+
+ const auto input_index{node.getInputs().at(0)};
+ UNUSED_RELEASE(input_index);
+ OP_REQUIRES(operands.at(output_index).shape().rank() == 1);
+}
+
+void ShapeValidator::visit(const ir::operation::ResizeBilinear &node)
+{
+ const auto &operands = _graph.operands();
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
+
+ if (operands.at(output_index).info().isDynamic())
+ {
+ return;
+ }
+ OP_REQUIRES(operands.at(input_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(output_index).shape().rank() == 4);
+}
+
+void ShapeValidator::visit(const ir::operation::Reverse &node)
+{
+ const auto &operands = _graph.operands();
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Reverse::Input::INPUT)};
+
+ if (operands.at(output_index).info().isDynamic())
+ return;
+ OP_REQUIRES(operands.at(output_index).shape() == operands.at(input_index).shape());
+}
+
+void ShapeValidator::visit(const ir::operation::If &)
+{
+ // TODO Add to validate with subgraphs
+}
+
+void ShapeValidator::visit(const ir::operation::While &)
+{
+ // This validator does not check shape. So checking isDynamic() is skipped.
+ // TODO Add to validate with subgraphs
+}
+
+void ShapeValidator::visit(const ir::operation::SquaredDifference &node)
+{
+ const auto &operands = _graph.operands();
+ const auto output_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
+
+ // Check for dimension constraints
+ if (operands.at(output_index).info().isDynamic())
+ return;
+
+ auto output_shape = operands.at(output_index).shape();
+ auto lhs_shape = operands.at(lhs_index).shape();
+ auto rhs_shape = operands.at(rhs_index).shape();
+ // Check for output rank
+ OP_REQUIRES(output_shape.rank() == std::max(lhs_shape.rank(), rhs_shape.rank()));
+ auto min_rank = std::min(lhs_shape.rank(), rhs_shape.rank());
+
+ for (int idx = 1; idx <= min_rank; idx++)
+ {
+ int l_idx = lhs_shape.rank() - idx;
+ int r_idx = rhs_shape.rank() - idx;
+ int out_idx = output_shape.rank() - idx;
+
+ OP_REQUIRES((l_idx >= 0) && (r_idx >= 0) && (out_idx >= 0));
+
+ auto l_dims = lhs_shape.dim(l_idx);
+ auto r_dims = rhs_shape.dim(r_idx);
+ auto out_dims = output_shape.dim(out_idx);
+
+ OP_REQUIRES(((l_dims == r_dims) && (out_dims == l_dims)) ||
+ ((l_dims == 1) && (out_dims == r_dims)) || ((r_dims == 1) && (out_dims == l_dims)));
+ }
+ auto &tmp_shape = (lhs_shape.rank() > rhs_shape.rank()) ? lhs_shape : rhs_shape;
+ for (int idx = min_rank + 1; idx <= output_shape.rank(); idx++)
+ {
+ int out_idx = output_shape.rank() - idx;
+ int tmp_idx = tmp_shape.rank() - idx;
+
+ OP_REQUIRES((out_idx >= 0) && (tmp_idx >= 0) &&
+ (output_shape.dim(out_idx) == tmp_shape.dim(tmp_idx)));
+ }
+}
+void ShapeValidator::visit(const ir::operation::Tile &node)
+{
+ const auto &operands = _graph.operands();
+ const auto output_index{node.getOutputs().at(0)};
+ if (operands.at(output_index).info().isDynamic())
+ return;
+
+ const auto input_index{node.getInputs().at(0)};
+ const auto multiple_index{node.getInputs().at(1)};
+
+ OP_REQUIRES(operands.at(multiple_index).shape().rank() == 1);
+ OP_REQUIRES(operands.at(multiple_index).shape().dim(0) ==
+ operands.at(input_index).shape().rank());
+ OP_REQUIRES(operands.at(input_index).shape().rank() == operands.at(output_index).shape().rank());
+}
+
+void ShapeValidator::visit(const ir::operation::Range &node)
+{
+ const auto &operands = _graph.operands();
+ const auto output_index{node.getOutputs().at(0)};
+ const auto start_index{node.getInputs().at(ir::operation::Range::Input::START)};
+ const auto limit_index{node.getInputs().at(ir::operation::Range::Input::LIMIT)};
+ const auto delta_index{node.getInputs().at(ir::operation::Range::Input::DELTA)};
+
+ // Check for dimension constraints
+ if (operands.at(output_index).info().isDynamic())
+ return;
+
+ OP_REQUIRES(operands.at(start_index).shape().rank() == 0);
+ OP_REQUIRES(operands.at(limit_index).shape().rank() == 0);
+ OP_REQUIRES(operands.at(delta_index).shape().rank() == 0);
+}
+
+void ShapeValidator::visit(const ir::operation::MatrixBandPart &node)
+{
+ const auto &operands = _graph.operands();
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::MatrixBandPart::Input::INPUT)};
+ const auto num_lower_index{
+ node.getInputs().at(ir::operation::MatrixBandPart::Input::NUM_LOWER_DIAG)};
+ const auto num_upper_index{
+ node.getInputs().at(ir::operation::MatrixBandPart::Input::NUM_UPPER_DIAG)};
+
+ // Check for dimension constraints
+ if (operands.at(output_index).info().isDynamic())
+ return;
+
+ OP_REQUIRES(operands.at(input_index).shape().rank() >= 2); // input must be more than 2 dim matrix
+ OP_REQUIRES(operands.at(num_upper_index).shape().rank() == 0); // num_lower must be scalar
+ OP_REQUIRES(operands.at(num_lower_index).shape().rank() == 0); // num_upper must be scalar
+}
+
+void ShapeValidator::visit(const ir::operation::LogSoftmax &node)
+{
+ const auto &operands = _graph.operands();
+ const auto output_index{node.getOutputs().at(0)};
+ if (operands.at(output_index).info().isDynamic())
+ return;
+
+ const auto input_index{node.getInputs().at(0)};
+
+ OP_REQUIRES(operands.at(output_index).shape().rank() == operands.at(input_index).shape().rank());
+}
+
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/OperationValidator.h b/runtime/onert/core/src/compiler/ShapeValidator.h
index deb6357bb..a51e8adc0 100644
--- a/runtime/onert/core/src/compiler/OperationValidator.h
+++ b/runtime/onert/core/src/compiler/ShapeValidator.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_COMPILER_OPERATION_VALIDATOR_H__
-#define __ONERT_COMPILER_OPERATION_VALIDATOR_H__
+#ifndef __ONERT_COMPILER_SHAPE_VALIDATOR_H__
+#define __ONERT_COMPILER_SHAPE_VALIDATOR_H__
#include "ir/Layout.h"
#include "ir/OperationVisitor.h"
@@ -34,18 +34,25 @@ namespace onert
namespace compiler
{
-class OperationValidator : public ir::OperationVisitor
+class ShapeValidator : public ir::OperationVisitor
{
public:
- OperationValidator(void) = delete;
- OperationValidator(const ir::Graph &graph);
+ ShapeValidator(void) = delete;
+ ShapeValidator(const ir::Graph &graph);
+ ShapeValidator(const ShapeValidator &) = delete;
+ ShapeValidator(ShapeValidator &&) = delete;
+ ~ShapeValidator() = default;
public:
+ ShapeValidator &operator=(const ShapeValidator &) = delete;
+ ShapeValidator &operator=(ShapeValidator &&) = delete;
void operator()();
public:
void visit(const ir::operation::BatchMatMul &node) override;
void visit(const ir::operation::BatchToSpaceND &node) override;
+ void visit(const ir::operation::BCQFullyConnected &node) override;
+ void visit(const ir::operation::BCQGather &node) override;
void visit(const ir::operation::Comparison &node) override;
void visit(const ir::operation::Softmax &node) override;
void visit(const ir::operation::InstanceNorm &node) override;
@@ -88,13 +95,10 @@ private:
void checkUnaryOp(const ir::Operation &node);
private:
- // TODO Remove _ctx field
const ir::Graph &_graph;
- const ir::Operands &_ctx;
- ir::Layout _current_op_seq_layout;
};
} // namespace compiler
} // namespace onert
-#endif // __ONERT_COMPILER_OPERATION_VALIDATOR_H__
+#endif // __ONERT_COMPILER_SHAPE_VALIDATOR_H__
diff --git a/runtime/onert/core/src/compiler/StaticShapeInference.cc b/runtime/onert/core/src/compiler/StaticShapeInference.cc
deleted file mode 100644
index 4eba1ff49..000000000
--- a/runtime/onert/core/src/compiler/StaticShapeInference.cc
+++ /dev/null
@@ -1,1096 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "compiler/StaticShapeInference.h"
-#include "util/ShapeInference.h"
-#include "util/logging.h"
-
-#include <sstream>
-
-namespace onert
-{
-namespace compiler
-{
-
-bool StaticShapeInferer::infer(const ir::OpSequence &op_seq)
-{
- bool has_dynamic_tensor = false;
-
- for (const auto &operation_idx : op_seq.operations())
- {
- auto &op = _operations.at(operation_idx);
- auto opcode = op.opcode();
-
- _return_has_dynamic_tensor = false; // this is used as a return value inside operation's visit()
-
- // IF: need shape inference for then, else
- // While: need shape inference for condition, body
- if (opcode == ir::OpCode::If || opcode == ir::OpCode::While)
- {
- op.accept(*this);
- }
- else
- {
- _return_has_dynamic_tensor = checkDynamicInput(op);
-
- if (_return_has_dynamic_tensor)
- {
- setDynamicOutput(op);
- }
- else
- {
- op.accept(*this);
- }
- }
-
- has_dynamic_tensor = has_dynamic_tensor || _return_has_dynamic_tensor;
- }
-
- return has_dynamic_tensor;
-}
-
-bool StaticShapeInferer::checkDynamicInput(const ir::Operation &op)
-{
- for (auto input_idx : op.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED)
- {
- if (_operands.at(input_idx).info().isDynamic())
- {
- return true;
- }
- }
-
- return false;
-}
-
-void StaticShapeInferer::setDynamicOutput(const ir::Operation &op)
-{
- for (auto output_idx : op.getOutputs())
- {
- _operands.at(output_idx).info().setDynamic();
- }
-}
-
-void StaticShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op,
- const ir::OperandIndex lhs_idx,
- const ir::OperandIndex rhs_idx)
-{
- const auto &lhs = _operands.at(lhs_idx);
- const auto &rhs = _operands.at(rhs_idx);
-
- const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
-
- // re-sizing output shape
- ir::Shape new_shape = shape_inference::inferEltwiseShape(lhs.info().shape(), rhs.info().shape());
- output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::handleSimpleUnaryOp(const ir::Operation &op,
- const ir::OperandIndex input_idx)
-{
- const auto &input = _operands.at(input_idx);
-
- // get mutable output operand
- const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
-
- // re-sizing output shape
- ir::Shape new_shape = input.info().shape();
- output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::dump()
-{
- auto get_shape_str = [](const ir::Shape &shape) {
- std::stringstream sstream;
- sstream << "shape : {";
- for (int i = 0; i < shape.rank(); i++)
- {
- if (i == 0)
- sstream << shape.dim(i);
- else
- sstream << " " << shape.dim(i);
- }
- sstream << "}";
- return sstream.str();
- };
-
- for (const auto &pair : _lowered_subgs)
- {
- const auto index = pair.first;
- const auto &lowered_subg = pair.second;
- VERBOSE(StaticShapeInferer) << "SubGraph #" << index.value() << std::endl;
- lowered_subg->graph().operands().iterate(
- [&](const ir::OperandIndex &ind, const ir::Operand &operand) {
- VERBOSE(StaticShapeInferer) << "Operand #" << ind.value() << ", "
- << (operand.info().isDynamic() ? "Dynamic" : "Static") << ", "
- << get_shape_str(operand.info().shape()) << std::endl;
- });
- }
-}
-
-void StaticShapeInferer::visit(const ir::operation::ArgMax &op)
-{
- const auto input_idx{op.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
-
- // get mutable output operand
- const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
- const auto rank = input.info().shape().rank();
- const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
-
- assert(0 <= axis && axis < rank);
-
- // re-sizing output shape
- ir::Shape new_shape = shape_inference::inferArgMaxShape(input.info().shape(), axis, rank);
- output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::BatchMatMul &op)
-{
- const auto lhs_index = op.getInputs().at(ir::operation::BatchMatMul::Input::LHS);
- const auto rhs_index = op.getInputs().at(ir::operation::BatchMatMul::Input::RHS);
- const auto output_index = op.getOutputs().at(0);
- const auto lhs = _operands.at(lhs_index);
- const auto rhs = _operands.at(rhs_index);
- auto &output = _operands.at(output_index);
- auto new_shape = shape_inference::inferBatchMatMulShape(lhs.shape(), rhs.shape(), op.param());
- output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::BinaryArithmetic &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS),
- op.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS));
-}
-
-void StaticShapeInferer::visit(const ir::operation::BroadcastTo &op)
-{
- // get mutable output operand
- const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
-
- const auto shape_idx{op.getInputs().at(ir::operation::BroadcastTo::Input::SHAPE)};
- const auto &shape = _operands.at(shape_idx);
-
- if (!shape.isConstant())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
- // assert(shape.typeInfo().type() == ir::DataType::INT32);
- auto shape_buffer = reinterpret_cast<const int32_t *>(shape.data()->base());
-
- // re-sizing output shape
- ir::Shape new_shape = shape_inference::inferBroadcastToShape(shape.info().shape(), shape_buffer);
- output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::Comparison &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Comparison::Input::INPUT0),
- op.getInputs().at(ir::operation::Comparison::Input::INPUT1));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Concat &op)
-{
- const auto input_count = op.getInputs().size();
-
- const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
-
- shape_inference::Shapes input_shapes;
- for (uint32_t i = 0; i < input_count; i++)
- {
- const auto input_idx{op.getInputs().at(i)};
- const auto &input = _operands.at(input_idx);
- input_shapes.emplace_back(input.shape());
- }
-
- ir::Shape out_shape = shape_inference::inferConcatShape(input_shapes, op.param());
-
- // re-sizing output shape
- output.info().shape(out_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::Conv2D &op)
-{
- const auto input_idx{op.getInputs().at(ir::operation::Conv2D::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
- const auto ker_idx{op.getInputs().at(ir::operation::Conv2D::Input::KERNEL)};
- const auto &ker = _operands.at(ker_idx);
- const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
-
- // re-sizing output shape
- ir::Shape new_shape =
- shape_inference::inferConv2DShape(input.info().shape(), ker.info().shape(), op.param());
- output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::ElementwiseActivation &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::ElementwiseBinary &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS),
- op.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS));
-}
-
-void StaticShapeInferer::visit(const ir::operation::ElementwiseUnary &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::ExpandDims &op)
-{
- const auto input_idx{op.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
- const auto axis_idx{op.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
- const auto &axis = _operands.at(axis_idx);
- const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
-
- if (!axis.isConstant())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
- // even when axis is constant, output shape should be recalculated since user might call
- // nnfw_set_input_tensorinfo(input, some_new_shape)
- auto axis_buf = reinterpret_cast<const int32_t *>(axis.data()->base());
- assert(axis_buf);
-
- // re-sizing output shape
- ir::Shape new_shape = shape_inference::inferExpandDimsShape(input.info().shape(), axis_buf[0]);
- output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::Fill &op)
-{
- const auto input_idx{op.getInputs().at(ir::operation::Fill::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
- const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
-
- if (!input.isConstant())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
- assert(input.typeInfo().type() == ir::DataType::INT32);
-
- auto input_buf = reinterpret_cast<const int32_t *>(input.data()->base());
- assert(input_buf);
-
- // re-sizing output shape
- ir::Shape new_shape = shape_inference::inferFillShape(input.info().shape(), input_buf);
- output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::FullyConnected &op)
-{
- const auto input_idx{op.getInputs().at(ir::operation::FullyConnected::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
-
- const auto ker_idx{op.getInputs().at(ir::operation::FullyConnected::Input::WEIGHT)};
- const auto &ker = _operands.at(ker_idx);
-
- // get mutable output operand
- const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
- // re-sizing output shape
- ir::Shape new_shape =
- shape_inference::inferFullyConnectedShape(input.info().shape(), ker.info().shape());
- output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::FusedBatchNorm &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::FusedBatchNorm::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Gather &op)
-{
- const auto input_idx{op.getInputs().at(ir::operation::Gather::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
-
- // get mutable output operand
- const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
-
- const auto indices_idx{op.getInputs().at(ir::operation::Gather::Input::INDICES)};
- const auto &indices = _operands.at(indices_idx);
- const auto rank = input.info().shape().rank();
- const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
-
- assert(0 <= axis && axis < rank);
-
- // re-sizing output shape
- ir::Shape new_shape =
- shape_inference::inferGatherShape(input.info().shape(), indices.info().shape(), axis, rank);
- output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::If &op)
-{
- auto &then_graph = _lowered_subgs.at(op.param().then_subg_index)->graph();
- auto &else_graph = _lowered_subgs.at(op.param().else_subg_index)->graph();
- const std::vector<ir::OperandIndex> inputs{op.getInputs().begin() + 1, op.getInputs().end()};
- const auto &outputs = op.getOutputs();
-
- // re-sizing input shapes of then subgraph
- const auto &then_inputs = then_graph.getInputs();
- assert(inputs.size() == then_inputs.size());
- for (size_t i = 0; i < inputs.size(); ++i)
- {
- auto &then_input = then_graph.operands().at(then_inputs.at(i));
- if (_operands.at(inputs.at(i)).info().isDynamic())
- {
- then_input.info().setDynamic();
- }
- else
- {
- auto new_shape = _operands.at(inputs.at(i)).info().shape();
- then_input.info().shape(new_shape);
- }
- }
-
- // re-sizing input shapes of else subgraph
- const auto &else_inputs = else_graph.getInputs();
- assert(inputs.size() == else_inputs.size());
- for (size_t i = 0; i < inputs.size(); ++i)
- {
- auto &else_input = else_graph.operands().at(else_inputs.at(i));
- if (_operands.at(inputs.at(i)).info().isDynamic())
- {
- else_input.info().setDynamic();
- }
- else
- {
- const auto &new_shape = _operands.at(inputs.at(i)).info().shape();
- else_input.info().shape(new_shape);
- }
- }
-
- // re-sizing operands of then subgraph
- StaticShapeInferer then_inferer(op.param().then_subg_index, _lowered_subgs);
- _lowered_subgs.at(op.param().then_subg_index)
- ->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
- bool has_dynamic_tensor = then_inferer.infer(op_seq);
- op_seq.has_dynamic_tensor(has_dynamic_tensor);
- });
-
- // re-sizing operands of else subgraph
- StaticShapeInferer else_inferer(op.param().else_subg_index, _lowered_subgs);
- _lowered_subgs.at(op.param().else_subg_index)
- ->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
- bool has_dynamic_tensor = else_inferer.infer(op_seq);
- op_seq.has_dynamic_tensor(has_dynamic_tensor);
- });
-
- // re-sizing output shapes
- const auto &then_outputs = _lowered_subgs.at(op.param().then_subg_index)->graph().getOutputs();
- const auto &else_outputs = _lowered_subgs.at(op.param().else_subg_index)->graph().getOutputs();
- assert(outputs.size() == then_outputs.size());
- assert(outputs.size() == else_outputs.size());
- for (size_t i = 0; i < outputs.size(); ++i)
- {
- const auto &then_output = then_graph.operands().at(then_outputs.at(i));
- const auto &else_output = else_graph.operands().at(else_outputs.at(i));
- auto &output = _operands.at(outputs.at(i));
- if (!then_output.info().isDynamic() && !else_output.info().isDynamic() &&
- then_output.shape() == else_output.shape())
- {
- output.info().shape(then_output.shape());
- }
- else
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- }
- }
-}
-
-void StaticShapeInferer::visit(const ir::operation::L2Normalization &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::L2Normalization::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::MatrixBandPart &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::MatrixBandPart::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::OneHot &op)
-{
- const auto indice_idx{op.getInputs().at(ir::operation::OneHot::Input::INDICES)};
- const auto &indice = _operands.at(indice_idx);
- const auto depth_idx{op.getInputs().at(ir::operation::OneHot::Input::DEPTH)};
- const auto &depth = _operands.at(depth_idx);
-
- const auto axis = op.param().axis;
-
- auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
-
- if (!depth.isConstant())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
- const auto *depth_buf = reinterpret_cast<const int32_t *>(depth.data()->base());
- assert(depth_buf);
- // re-sizing output shape
- ir::Shape new_shape = shape_inference::inferOnehotShape(indice.info().shape(), *depth_buf, axis);
- output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::Pack &op)
-{
- const auto input_idx{op.getInputs().at(0)};
- const auto &input = _operands.at(input_idx);
-
- // get mutable output operand
- const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
-
- const auto rank = input.shape().rank() + 1;
- const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
- const auto num = op.param().num;
-
- assert(0 <= axis && axis < rank);
-
- // re-sizing output shape
- ir::Shape new_shape = shape_inference::inferPackShape(input.info().shape(), axis, rank, num);
- output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::Pad &op)
-{
- const auto input_idx{op.getInputs().at(ir::operation::Pad::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
-
- const auto pad_idx{op.getInputs().at(ir::operation::Pad::Input::PAD)};
- const auto &pad = _operands.at(pad_idx);
-
- // get mutable output operand
- const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
-
- // if pad is not constant, output also becomes dynamic
- if (!pad.isConstant())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
- // re-sizing output shape
- const auto new_shape = shape_inference::inferPadShape(
- input.shape(), reinterpret_cast<const int32_t *>(pad.data()->base()),
- pad.shape().num_elements());
- output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::Permute &op)
-{
- const auto input_idx{op.getInputs().at(0)};
- const auto &input = _operands.at(input_idx);
- const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
-
- // re-sizing output shape
- // Permute is a special operation that layouts of input/output may be different on backend
- // However, it is not applied here, so input/output have the same layout of frontend. Because
- // "ExecutorFactory" would convert shape of input/output accoding to the layouts when registering
- // operand info to "TensorBuilder" after calling "StaticShapeInferer"
- const auto new_shape = input.info().shape();
- output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::Pow &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Pow::Input::LHS),
- op.getInputs().at(ir::operation::Pow::Input::RHS));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Range &op)
-{
- const auto start_idx{op.getInputs().at(ir::operation::Range::Input::START)};
- const auto limit_idx{op.getInputs().at(ir::operation::Range::Input::LIMIT)};
- const auto delta_idx{op.getInputs().at(ir::operation::Range::Input::DELTA)};
- const auto &start_op = _operands.at(start_idx);
- const auto &limit_op = _operands.at(limit_idx);
- const auto &delta_op = _operands.at(delta_idx);
-
- // get mutable output operand
- const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
-
- ir::Shape new_shape;
- if (start_op.isConstant() && limit_op.isConstant() && delta_op.isConstant())
- {
- assert(start_op.typeInfo().type() == limit_op.typeInfo().type() &&
- start_op.typeInfo().type() == delta_op.typeInfo().type());
- if (output.typeInfo().type() == ir::DataType::FLOAT32)
- {
- new_shape = shape_inference::inferRangeShape<float>(
- start_op.asScalar<float>(), limit_op.asScalar<float>(), delta_op.asScalar<float>());
- }
- else if (output.typeInfo().type() == ir::DataType::INT32)
- {
- new_shape = shape_inference::inferRangeShape<int32_t>(
- start_op.asScalar<int32_t>(), limit_op.asScalar<int32_t>(), delta_op.asScalar<int32_t>());
- }
- assert(output.shape() == new_shape);
- }
- else
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- }
-}
-
-void StaticShapeInferer::visit(const ir::operation::Reduce &op)
-{
- const auto input_idx{op.getInputs().at(ir::operation::Reduce::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
-
- const auto axes_idx{op.getInputs().at(ir::operation::Reduce::Input::AXES)};
- const auto &axes = _operands.at(axes_idx);
-
- // get mutable output operand
- const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
-
- std::vector<int32_t> axes_vec;
- for (size_t i = 0; i < axes.shape().num_elements(); ++i)
- {
- switch (axes.typeInfo().type())
- {
- case ir::DataType::INT32:
- {
- axes_vec.emplace_back(reinterpret_cast<const int32_t *>(axes.data()->base())[i]);
- break;
- }
- case ir::DataType::INT64:
- {
- axes_vec.emplace_back(reinterpret_cast<const int64_t *>(axes.data()->base())[i]);
- break;
- }
- default:
- throw std::runtime_error("StaticShapeInferer " + op.name() + ": Not supported data type");
- break;
- }
- }
- const auto keep_dims = op.param().keep_dims;
-
- // re-sizing output shape
- ir::Shape new_shape =
- shape_inference::inferReduceShape(input.info().shape(), axes_vec, keep_dims);
- output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::Reshape &op)
-{
- const auto input_idx{op.getInputs().at(ir::operation::Reshape::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
-
- // get mutable output operand
- const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
-
- // New shape is given by second input tensor
- if (op.getInputs().size() == 2)
- {
- // Let's check the second input
- const auto shape_idx{op.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
- const auto &shape = _operands.at(shape_idx);
-
- if (shape.isConstant())
- {
- const auto *shape_buf = reinterpret_cast<const int32_t *>(shape.data()->base());
- assert(shape_buf);
-
- ir::Shape new_shape = shape_inference::inferReshapeShape(
- shape_buf, shape.shape().num_elements(), input.shape().num_elements());
-
- // if shape is from Const, TFLC put the shape of output into tensor
- if (new_shape != output.shape())
- {
- // change on output shape
- output.info().shape(new_shape);
- }
- }
- else
- {
- // if shape is NOT Const, set output shape to be dynamic_
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- }
- }
- // New shape is given by option
- else if (op.param().new_shape.size() != 0)
- {
- // Let's check the new_shape option
- auto shape = op.param().new_shape;
- ir::Shape new_shape = shape_inference::inferReshapeShape(shape.data(), shape.size(),
- input.shape().num_elements());
-
- if (new_shape != output.shape())
- {
- // change on output shape
- output.info().shape(new_shape);
- }
- }
- else
- {
- throw std::runtime_error("Reshape: new shape is missing");
- }
-}
-
-void StaticShapeInferer::visit(const ir::operation::ResizeBilinear &op)
-{
- const auto input_idx{op.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
-
- // get mutable output operand
- const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
-
- // Shape inferencing logic based on Params
- ir::Shape new_shape = shape_inference::inferResizeBilinearShape(
- input.shape(), op.param().height_out, op.param().width_out);
-
- // if size_op is from Const, TFLC put the shape of output into tensor
- if (new_shape != output.shape())
- {
- // change on output shape
- output.info().shape(new_shape);
- }
-}
-
-void StaticShapeInferer::visit(const ir::operation::Reverse &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Reverse::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Select &op)
-{
- const auto input_cond_idx{op.getInputs().at(ir::operation::Select::Input::CONDITION)};
- const auto &input_cond = _operands.at(input_cond_idx);
-
- const auto input_true_idx{op.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
- const auto &input_true = _operands.at(input_true_idx);
-
- const auto input_false_idx{op.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
- const auto &input_false = _operands.at(input_false_idx);
-
- auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
-
- // Select output shpae
- ir::Shape new_shape = shape_inference::inferSelectShape(
- input_cond.info().shape(), input_true.info().shape(), input_false.info().shape());
- output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::Shape &op)
-{
- const auto input_idx{op.getInputs().at(0)};
- const auto &input = _operands.at(input_idx);
-
- // get mutable output operand
- const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
-
- // re-sizing output shape
- ir::Shape output_shape;
- output_shape.append(input.info().shape().rank());
-
- output.info().shape(output_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::Slice &op)
-{
- const auto input_index{op.getInputs().at(ir::operation::Slice::Input::INPUT)};
- const auto &input = _operands.at(input_index);
- const auto begins_index{op.getInputs().at(ir::operation::Slice::Input::BEGINS)};
- const auto &begins = _operands.at(begins_index);
- const auto sizes_index{op.getInputs().at(ir::operation::Slice::Input::SIZES)};
- const auto &sizes = _operands.at(sizes_index);
- const auto output_index = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_index);
-
- // Whether input is constant or not does not affect whether output is dynamic or not
- if (!(begins.isConstant() && sizes.isConstant()))
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
- auto begins_buf = reinterpret_cast<const int32_t *>(begins.data()->base());
- auto sizes_buf = reinterpret_cast<const int32_t *>(sizes.data()->base());
-
- ir::Shape new_shape =
- shape_inference::inferSliceShape(input.info().shape(), begins_buf, sizes_buf);
- output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::Softmax &op)
-{
- handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Softmax::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::SpaceToBatchND &op)
-{
- const auto output_index = op.getOutputs().at(0);
- const auto input_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
- const auto block_shape_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
- const auto padding_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
-
- ir::Operand &output = _operands.at(output_index);
- const auto &input = _operands.at(input_idx);
- const auto &block_shape = _operands.at(block_shape_idx);
- const auto &padding = _operands.at(padding_idx);
-
- // Whether input is constant or not does not affect whether output is dynamic or not
- if (!(block_shape.isConstant() && padding.isConstant()))
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
- auto input_shape = input.info().shape();
- auto block_shape_shape = block_shape.info().shape();
- auto padding_shape = padding.info().shape();
-
- auto block_shape_data = reinterpret_cast<const int32_t *>(block_shape.data()->base());
- auto padding_data = reinterpret_cast<const int32_t *>(padding.data()->base());
-
- ir::Shape new_shape = shape_inference::inferSpaceToBatchNDShape(
- input_shape, block_shape_shape, padding_shape, block_shape_data, padding_data);
-
- output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::Split &op)
-{
- const auto input_idx{op.getInputs().at(0)};
- const auto &input = _operands.at(input_idx);
-
- const auto axis = op.param().axis;
- const auto num_splits = op.param().num_splits;
-
- const auto rank = input.info().shape().rank();
- auto axis_resolved = axis < 0 ? axis + rank : axis;
-
- assert(0 <= axis_resolved && axis_resolved < rank);
-
- ir::Shape new_shape =
- shape_inference::inferSplitShape(input.info().shape(), axis_resolved, num_splits);
- auto output_tensors = op.getOutputs();
- for (auto output_idx : output_tensors)
- {
- ir::Operand &output = _operands.at(output_idx);
- output.info().shape(new_shape);
- }
-}
-
-void StaticShapeInferer::visit(const ir::operation::SquaredDifference &op)
-{
- handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::SquaredDifference::Input::LHS),
- op.getInputs().at(ir::operation::SquaredDifference::Input::RHS));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Squeeze &op)
-{
- const auto input_idx{op.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
-
- const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
-
- if (input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
- // Squeeze output shpae
- ir::Shape new_shape = shape_inference::inferSqueezeShape(input.info().shape(), op.param());
- output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::StridedSlice &op)
-{
- const auto input_index{op.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
- const auto &input = _operands.at(input_index);
- const auto starts_index{op.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
- const auto &starts = _operands.at(starts_index);
- const auto ends_index{op.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
- const auto &ends = _operands.at(ends_index);
- const auto strides_index{op.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
- const auto &strides = _operands.at(strides_index);
- const auto output_index = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_index);
-
- if (!(starts.isConstant() && ends.isConstant() && strides.isConstant()))
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
- const auto begin_mask = op.param().begin_mask;
- const auto end_mask = op.param().end_mask;
- const auto shrink_axis_mask = op.param().shrink_axis_mask;
- const auto rank = input.info().shape().rank();
-
- auto starts_buf = reinterpret_cast<const uint32_t *>(starts.data()->base());
- auto ends_buf = reinterpret_cast<const uint32_t *>(ends.data()->base());
- auto strides_buf = reinterpret_cast<const uint32_t *>(strides.data()->base());
-
- auto op_params = shape_inference::buildStridedSliceParams(
- starts_buf, ends_buf, strides_buf, begin_mask, end_mask, shrink_axis_mask, rank);
-
- ir::Shape new_shape =
- shape_inference::inferStridedSliceShape(input.info().shape(), op_params, rank);
- output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::Tile &op)
-{
- const auto input_idx{op.getInputs().at(ir::operation::Tile::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
-
- const auto multiplier_idx{op.getInputs().at(ir::operation::Tile::Input::MULTIPLES)};
- const auto &multiplier = _operands.at(multiplier_idx);
-
- const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
-
- if (!multiplier.isConstant())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- return;
- }
-
- auto multiplier_buffer = reinterpret_cast<const int32_t *>(multiplier.data()->base());
- assert(multiplier_buffer);
-
- // re-sizing output shape
- auto new_shape = shape_inference::inferTileShape(input.info().shape(), multiplier_buffer);
- output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::Transpose &op)
-{
- const auto input_idx{op.getInputs().at(ir::operation::Transpose::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
-
- // get mutable output operand
- const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
- const auto perm{op.param().perm};
- // const auto rank{op.param().rank};
-
- // set output shape, based on input and params
- ir::Shape new_shape = shape_inference::inferTransposeShape(input.info().shape(), perm);
- output.info().shape(new_shape);
-}
-
-void StaticShapeInferer::visit(const ir::operation::Unpack &op)
-{
- const auto input_idx{op.getInputs().at(0)};
- const auto &input = _operands.at(input_idx);
- const auto num = op.param().num;
- const auto rank = input.shape().rank();
- const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
-
- assert(axis < rank);
- if (axis < 0)
- {
- for (int out_tensor_idx = 0; out_tensor_idx < num; out_tensor_idx++)
- {
- const auto output_idx = op.getOutputs().at(out_tensor_idx);
- ir::Operand &output = _operands.at(output_idx);
- output.info().setDynamic();
- }
- _return_has_dynamic_tensor = true;
- return;
- }
-
- ir::Shape new_shape = shape_inference::inferUnpackShape(input.info().shape(), axis, rank);
-
- // re-sizing output shape
- for (int out_tensor_idx = 0; out_tensor_idx < num; out_tensor_idx++)
- {
- const auto output_idx = op.getOutputs().at(out_tensor_idx);
- ir::Operand &output = _operands.at(output_idx);
- output.info().shape(new_shape);
- }
-}
-
-void StaticShapeInferer::visit(const ir::operation::While &op)
-{
- auto &cond_graph = _lowered_subgs.at(op.param().cond_subg_index)->graph();
- auto &body_graph = _lowered_subgs.at(op.param().body_subg_index)->graph();
- const auto inputs = op.getInputs();
- const auto &outputs = op.getOutputs();
-
- // re-sizing input shapes of then subgraph
- const auto &cond_inputs = cond_graph.getInputs();
- assert(inputs.size() == cond_inputs.size());
- for (size_t i = 0; i < inputs.size(); ++i)
- {
- const auto &input = _operands.at(inputs.at(i));
- auto &cond_input = cond_graph.operands().at(cond_inputs.at(i));
- if (input.info().isDynamic())
- {
- cond_input.info().setDynamic();
- }
- else
- {
- auto new_shape = input.info().shape();
- cond_input.info().shape(new_shape);
- }
- }
-
- // re-sizing input shapes of body subgraph
- const auto &body_inputs = body_graph.getInputs();
- assert(cond_inputs.size() == body_inputs.size());
- for (size_t i = 0; i < cond_inputs.size(); ++i)
- {
- const auto &cond_input = cond_graph.operands().at(cond_inputs.at(i));
- auto &body_input = body_graph.operands().at(body_inputs.at(i));
- if (cond_input.info().isDynamic())
- {
- body_input.info().setDynamic();
- }
- else
- {
- const auto &new_shape = cond_input.info().shape();
- body_input.info().shape(new_shape);
- }
- }
-
- // re-sizing operands of body subgraph
- StaticShapeInferer body_inferer(op.param().body_subg_index, _lowered_subgs);
- _lowered_subgs.at(op.param().body_subg_index)
- ->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
- bool has_dynamic_tensor = body_inferer.infer(op_seq);
- op_seq.has_dynamic_tensor(has_dynamic_tensor);
- });
-
- // Check whether while operation's shapes are predictable
- // If any of shape of body outputs and cond inputs are different, non-constant operands would be
- // set to dynamic
- bool check_unpredictable_dynamic = false;
- const auto &body_outputs = body_graph.getOutputs();
- assert(body_outputs.size() == cond_inputs.size());
- for (size_t i = 0; i < body_outputs.size(); ++i)
- {
- const auto &body_output = body_graph.operands().at(body_outputs.at(i));
- auto &cond_input = cond_graph.operands().at(cond_inputs.at(i));
- if ((cond_input.info().isDynamic() != body_output.info().isDynamic()) ||
- (cond_input.shape() != body_output.shape()))
- {
- check_unpredictable_dynamic = true;
- break;
- }
- }
-
- if (check_unpredictable_dynamic)
- {
- // Set inputs of body subgraph
- for (const auto &input_index : body_inputs)
- {
- auto &input = body_graph.operands().at(input_index);
- if (!input.isConstant())
- {
- input.info().setDynamic();
- }
- }
-
- // Set inputs of cond subgraph
- for (const auto &input_index : cond_inputs)
- {
- auto &input = cond_graph.operands().at(input_index);
- if (!input.isConstant())
- {
- input.info().setDynamic();
- }
- }
-
- // Set non-constant operands of body subgraph to dynamic
- StaticShapeInferer body_inferer(op.param().body_subg_index, _lowered_subgs);
- _lowered_subgs.at(op.param().body_subg_index)
- ->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
- bool has_dynamic_tensor = body_inferer.infer(op_seq);
- op_seq.has_dynamic_tensor(has_dynamic_tensor);
- });
- }
-
- // re-sizing operands of cond subgraph
- // If check_unpredictable_dynamic is true, non-constant operands of cond subgraph would be set to
- // dynamic
- StaticShapeInferer cond_inferer(op.param().cond_subg_index, _lowered_subgs);
- _lowered_subgs.at(op.param().cond_subg_index)
- ->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
- bool has_dynamic_tensor = cond_inferer.infer(op_seq);
- op_seq.has_dynamic_tensor(has_dynamic_tensor);
- });
-
- // re-sizing outputs of while operation
- // If check_unpredictable_dynamic is true, outputs of while operation would be set to dynamic
- assert(cond_inputs.size() == outputs.size());
- for (size_t i = 0; i < cond_inputs.size(); ++i)
- {
- const auto &cond_input = cond_graph.operands().at(cond_inputs.at(i));
- auto &output = _operands.at(outputs.at(i));
- if (cond_input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- }
- else
- {
- const auto new_shape = cond_input.info().shape();
- output.info().shape(new_shape);
- }
- }
-}
-
-} // namespace compiler
-
-} // namespace onert
diff --git a/runtime/onert/core/src/compiler/StaticShapeInferer.cc b/runtime/onert/core/src/compiler/StaticShapeInferer.cc
new file mode 100644
index 000000000..ec5d2146b
--- /dev/null
+++ b/runtime/onert/core/src/compiler/StaticShapeInferer.cc
@@ -0,0 +1,1425 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "compiler/StaticShapeInferer.h"
+#include "util/ShapeInference.h"
+#include "util/logging.h"
+
+#include <misc/polymorphic_downcast.h>
+
+#include <sstream>
+#include <stdexcept>
+
+namespace onert
+{
+namespace compiler
+{
+void OperandObserver::updateShapes(const std::vector<ir::OperandInfo> &changed_operands_info,
+ bool unpredictable)
+{
+ assert(changed_operands_info.size() == _operands.size());
+ for (size_t i = 0; i < changed_operands_info.size(); ++i)
+ {
+ const auto &changed_operand_info = changed_operands_info.at(i);
+ auto &operand = _operands.at(i);
+ // assert(changed_operand_info.typeInfo() == operand->typeInfo());
+ // assert(changed_operand_info.typeInfo() == operand->typeInfo());
+ // This error check may by replaced by an assertion if this function is called after the
+ // validation of models are completed.
+ if (changed_operand_info.typeInfo() != operand->typeInfo())
+ {
+ throw std::runtime_error("OperandObserver: The types of operands are mismatched");
+ }
+ if (!operand->info().isConstant() && (changed_operand_info.isDynamic() || unpredictable))
+ {
+ operand->info().setDynamic();
+ }
+ else
+ {
+ const auto &new_shape = changed_operands_info.at(i).shape();
+ operand->info().shape(new_shape);
+ }
+ }
+}
+
+void StaticShapeInferer::infer()
+{
+ for (const auto &op_idx : _lowered_subg->graph().topolSortOperations())
+ {
+ const auto &op = _lowered_subg->graph().operations().at(op_idx);
+ bool has_dynamic_tensor = false;
+ const auto opcode = op.opcode();
+ // IF: requires shape inference for then, else
+ // While: requires shape inference for condition, body
+ if (opcode == ir::OpCode::If || opcode == ir::OpCode::While)
+ {
+ op.accept(*this);
+ }
+ else
+ {
+ has_dynamic_tensor = checkDynamicInput(op);
+ if (has_dynamic_tensor)
+ {
+ setDynamicOutput(op);
+ }
+ else
+ {
+ op.accept(*this);
+ }
+ }
+ has_dynamic_tensor = has_dynamic_tensor || checkDynamicOutput(op);
+ _lowered_subg->setHasDynamicTensor(op_idx, has_dynamic_tensor);
+ }
+
+ if (_controlflow_output_observer != nullptr)
+ {
+ // re-sizing output shapes of the controflow operation branching to this subgraph
+ std::vector<ir::OperandInfo> outputs_info;
+ const auto &graph = _lowered_subg->graph();
+ const auto &outputs = graph.getOutputs();
+ for (size_t i = 0; i < outputs.size(); ++i)
+ {
+ const auto &operand_info = graph.operands().at(outputs.at(i)).info();
+ outputs_info.emplace_back(operand_info);
+ }
+ _controlflow_output_observer->updateShapes(outputs_info);
+ }
+}
+
+bool StaticShapeInferer::checkDynamicInput(const ir::IOperation &op)
+{
+ const auto &operands = _lowered_subg->graph().operands();
+ for (auto &&input_idx : op.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED)
+ {
+ if (operands.at(input_idx).info().isDynamic())
+ {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool StaticShapeInferer::checkDynamicOutput(const ir::IOperation &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+ for (auto &&output_idx : op.getOutputs() | ir::Remove::UNDEFINED)
+ {
+ if (operands.at(output_idx).info().isDynamic())
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+void StaticShapeInferer::setDynamicOutput(const ir::IOperation &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+ for (auto &&output_idx : op.getOutputs() | ir::Remove::UNDEFINED)
+ {
+ operands.at(output_idx).info().setDynamic();
+ }
+}
+
+void StaticShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op,
+ const ir::OperandIndex lhs_idx,
+ const ir::OperandIndex rhs_idx)
+{
+ auto &operands = _lowered_subg->graph().operands();
+ const auto &lhs = operands.at(lhs_idx);
+ const auto &rhs = operands.at(rhs_idx);
+
+ const auto output_idx = op.getOutputs().at(0);
+ ir::Operand &output = operands.at(output_idx);
+
+ // re-sizing output shape
+ ir::Shape new_shape = shape_inference::inferEltwiseShape(lhs.info().shape(), rhs.info().shape());
+ output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::handleSimpleUnaryOp(const ir::Operation &op,
+ const ir::OperandIndex input_idx)
+{
+ auto &operands = _lowered_subg->graph().operands();
+ const auto &input = operands.at(input_idx);
+
+ // get mutable output operand
+ const auto output_idx = op.getOutputs().at(0);
+ ir::Operand &output = operands.at(output_idx);
+
+ // re-sizing output shape
+ ir::Shape new_shape = input.info().shape();
+ output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::dump()
+{
+ auto get_shape_str = [](const ir::Shape &shape) {
+ std::stringstream sstream;
+ sstream << "shape : {";
+ for (int i = 0; i < shape.rank(); i++)
+ {
+ if (i == 0)
+ sstream << shape.dim(i);
+ else
+ sstream << " " << shape.dim(i);
+ }
+ sstream << "}";
+ return sstream.str();
+ };
+
+ _lowered_subg->graph().operands().iterate(
+ [&](const ir::OperandIndex &ind, const ir::Operand &operand) {
+ VERBOSE(StaticShapeInferer) << " " << ind << ", "
+ << (operand.info().isDynamic() ? "Dynamic" : "Static") << ", "
+ << get_shape_str(operand.info().shape()) << std::endl;
+ });
+}
+
+std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>>
+StaticShapeInferer::createStaticShapeInferers(
+ const std::unordered_map<ir::SubgraphIndex, ILoweredGraph *> &lowered_subgs)
+{
+ // Allocate StaticShapeInferer per each subgraph
+ std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers;
+ for (auto &&pair : lowered_subgs)
+ {
+ const auto &subg_index = pair.first;
+ auto &lowered_subg = pair.second;
+ inferers[subg_index] = std::make_unique<StaticShapeInferer>(lowered_subg);
+ }
+
+ // Append observers in all StaticShapeInferers
+ for (auto &&pair : lowered_subgs)
+ {
+ const auto &subg_index = pair.first;
+ auto &lowered_subg = pair.second;
+
+ // TODO: Change this iteration for all to controlflow iteration
+ lowered_subg->graph().operations().iterate(
+ [&](const ir::OperationIndex &, const ir::IOperation &op) {
+ // A Function to append child inferers. These make it possible for a StaticShapeInferer to
+ // call StaticShapeInferes of child subgraphs recursively
+ auto appendChildInferer = [&](const ir::SubgraphIndex &child_subg_idx) {
+ auto *child_inferer = inferers.at(child_subg_idx).get();
+ inferers.at(subg_index)->appendChildInferer(child_subg_idx, child_inferer);
+ };
+
+ // A Function to appaend subg input observers. This makes it possible for a
+ // StaticShapeInferer to update inputs of child subgraphs
+ auto appendSubgraphInputObserver = [&](const ir::SubgraphIndex &child_subg_idx) {
+ std::vector<ir::Operand *> child_subg_inputs;
+ auto &child_subg = lowered_subgs.at(child_subg_idx)->graph();
+ for (const auto &input_idx : child_subg.getInputs())
+ {
+ auto operand_ptr = child_subg.operands().getRawPtr(input_idx);
+ child_subg_inputs.emplace_back(operand_ptr);
+ }
+ inferers.at(subg_index)
+ ->appendSubgInputObserver(child_subg_idx,
+ std::make_unique<OperandObserver>(child_subg_inputs));
+ };
+
+ // A Function to set controlflow output observers. This makes it possible for a
+ // StaticShapeInferer to update outputs of parent controlflow opeerations
+ auto setControlFlowOutputObserver = [&](const ir::SubgraphIndex &child_subg_idx) {
+ std::vector<ir::Operand *> cf_outputs;
+ auto &subg = lowered_subg->graph();
+ for (const auto &output_idx : op.getOutputs())
+ {
+ auto operand_ptr = subg.operands().getRawPtr(output_idx);
+ cf_outputs.emplace_back(operand_ptr);
+ }
+ inferers.at(child_subg_idx)
+ ->setControlflowOutputObserver(std::make_unique<OperandObserver>(cf_outputs));
+ };
+
+ // Append Observers in a StaticShapeInferer
+ if (op.opcode() == ir::OpCode::If)
+ {
+ // TODO Remove dynamic_cast
+ // An virtual base class cannot be downcasted by static_cast
+ const auto &if_op = dynamic_cast<const ir::operation::If &>(op);
+
+ appendChildInferer(if_op.param().then_subg_index);
+ appendChildInferer(if_op.param().else_subg_index);
+
+ appendSubgraphInputObserver(if_op.param().then_subg_index);
+ appendSubgraphInputObserver(if_op.param().else_subg_index);
+
+ setControlFlowOutputObserver(if_op.param().then_subg_index);
+ }
+ else if (op.opcode() == ir::OpCode::While)
+ {
+ // TODO Remove dynamic_cast
+ const auto &while_op = dynamic_cast<const ir::operation::While &>(op);
+
+ appendChildInferer(while_op.param().cond_subg_index);
+ appendChildInferer(while_op.param().body_subg_index);
+
+ appendSubgraphInputObserver(while_op.param().cond_subg_index);
+ appendSubgraphInputObserver(while_op.param().body_subg_index);
+
+ setControlFlowOutputObserver(while_op.param().body_subg_index);
+ }
+ });
+ }
+
+ return inferers;
+}
+
+void StaticShapeInferer::visit(const ir::operation::ArgMinMax &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+
+ const auto input_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)};
+ const auto &input = operands.at(input_idx);
+
+ const auto axis_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)};
+ const auto &axis = operands.at(axis_idx);
+
+ // get mutable output operand
+ const auto output_idx = op.getOutputs().at(0);
+ ir::Operand &output = operands.at(output_idx);
+
+ if (!axis.isConstant())
+ {
+ output.info().setDynamic();
+ return;
+ }
+
+ const auto rank = input.info().shape().rank();
+ auto axis_value = axis.asScalar<int32_t>();
+ axis_value = axis_value < 0 ? axis_value + rank : axis_value;
+
+ // re-sizing output shape
+ ir::Shape new_shape =
+ shape_inference::inferArgMinMaxShape(input.info().shape(), axis_value, rank);
+ output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::BatchMatMul &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+
+ const auto lhs_index = op.getInputs().at(ir::operation::BatchMatMul::Input::LHS);
+ const auto rhs_index = op.getInputs().at(ir::operation::BatchMatMul::Input::RHS);
+ const auto output_index = op.getOutputs().at(0);
+ const auto &lhs = operands.at(lhs_index);
+ const auto &rhs = operands.at(rhs_index);
+ auto &output = operands.at(output_index);
+ auto new_shape = shape_inference::inferBatchMatMulShape(lhs.shape(), rhs.shape(), op.param());
+ output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::BCQFullyConnected &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+
+ const auto input_idx{op.getInputs().at(ir::operation::BCQFullyConnected::Input::INPUT)};
+ const auto &input = operands.at(input_idx);
+
+ const auto cluster_idx{
+ op.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)};
+ const auto &cluster = operands.at(cluster_idx);
+
+ const auto output_idx = op.getOutputs().at(0);
+ ir::Operand &output = operands.at(output_idx);
+
+ auto cluster_buf = reinterpret_cast<const int32_t *>(cluster.data()->base());
+ assert(cluster_buf);
+
+ // re-sizing output shape
+ ir::Shape new_shape = shape_inference::inferBCQFullyConnectedShape(
+ input.info().shape(), cluster.info().shape(), cluster_buf);
+ output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::BCQGather &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+
+ const auto indices_idx{op.getInputs().at(ir::operation::BCQGather::Input::INDICES)};
+ const auto &indices = operands.at(indices_idx);
+
+ const auto input_binary_idx{op.getInputs().at(ir::operation::BCQGather::Input::INPUT_BINARY)};
+ const auto &input_binary = operands.at(input_binary_idx);
+
+ const auto cluster_idx{op.getInputs().at(ir::operation::BCQGather::Input::INPUT_CLUSTERS)};
+ const auto &cluster = operands.at(cluster_idx);
+
+ const auto output_idx = op.getOutputs().at(0);
+ ir::Operand &output = operands.at(output_idx);
+
+ auto cluster_buf = reinterpret_cast<const int32_t *>(cluster.data()->base());
+ assert(cluster_buf);
+
+ auto rank = input_binary.shape().rank();
+
+ // re-sizing output shape
+ ir::Shape new_shape = shape_inference::inferBCQGatherShape(
+ indices.info().shape(), cluster.info().shape(), cluster_buf, rank, op.param());
+
+ output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::BinaryArithmetic &op)
+{
+ handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS),
+ op.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS));
+}
+
+void StaticShapeInferer::visit(const ir::operation::BroadcastTo &op)
+{
+ // get mutable output operand
+ auto &operands = _lowered_subg->graph().operands();
+ const auto output_idx = op.getOutputs().at(0);
+ ir::Operand &output = operands.at(output_idx);
+
+ const auto shape_idx{op.getInputs().at(ir::operation::BroadcastTo::Input::SHAPE)};
+ const auto &shape = operands.at(shape_idx);
+
+ if (!shape.isConstant())
+ {
+ output.info().setDynamic();
+ return;
+ }
+
+ // assert(shape.typeInfo().type() == ir::DataType::INT32);
+ auto shape_buffer = reinterpret_cast<const int32_t *>(shape.data()->base());
+
+ // re-sizing output shape
+ ir::Shape new_shape = shape_inference::inferBroadcastToShape(shape.info().shape(), shape_buffer);
+ output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::Comparison &op)
+{
+ handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Comparison::Input::INPUT0),
+ op.getInputs().at(ir::operation::Comparison::Input::INPUT1));
+}
+
+void StaticShapeInferer::visit(const ir::operation::Concat &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+
+ const auto input_count = op.getInputs().size();
+
+ const auto output_idx = op.getOutputs().at(0);
+ ir::Operand &output = operands.at(output_idx);
+
+ shape_inference::Shapes input_shapes;
+ for (uint32_t i = 0; i < input_count; i++)
+ {
+ const auto input_idx{op.getInputs().at(i)};
+ const auto &input = operands.at(input_idx);
+ input_shapes.emplace_back(input.shape());
+ }
+
+ ir::Shape out_shape = shape_inference::inferConcatShape(input_shapes, op.param());
+
+ // re-sizing output shape
+ output.info().shape(out_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::Conv2D &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+
+ const auto input_idx{op.getInputs().at(ir::operation::Conv2D::Input::INPUT)};
+ const auto &input = operands.at(input_idx);
+ const auto ker_idx{op.getInputs().at(ir::operation::Conv2D::Input::KERNEL)};
+ const auto &ker = operands.at(ker_idx);
+ const auto output_idx = op.getOutputs().at(0);
+ ir::Operand &output = operands.at(output_idx);
+
+ // re-sizing output shape
+ ir::Shape new_shape =
+ shape_inference::inferConv2DShape(input.info().shape(), ker.info().shape(), op.param());
+ output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::ElementwiseActivation &op)
+{
+ handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT));
+}
+
+void StaticShapeInferer::visit(const ir::operation::ElementwiseBinary &op)
+{
+ handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS),
+ op.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS));
+}
+
+void StaticShapeInferer::visit(const ir::operation::ElementwiseUnary &op)
+{
+ handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT));
+}
+
+void StaticShapeInferer::visit(const ir::operation::ExpandDims &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+
+ const auto input_idx{op.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
+ const auto &input = operands.at(input_idx);
+ const auto axis_idx{op.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
+ const auto &axis = operands.at(axis_idx);
+ const auto output_idx = op.getOutputs().at(0);
+ ir::Operand &output = operands.at(output_idx);
+
+ if (!axis.isConstant())
+ {
+ output.info().setDynamic();
+ return;
+ }
+
+ // even when axis is constant, output shape should be recalculated since user might call
+ // nnfw_set_input_tensorinfo(input, some_new_shape)
+ auto axis_type = axis.typeInfo().type();
+ assert(axis_type == ir::DataType::INT32 || axis_type == ir::DataType::INT64);
+
+ assert(axis.data()->base());
+ int32_t axis_value =
+ (axis_type == ir::DataType::INT32)
+ ? reinterpret_cast<const int32_t *>(axis.data()->base())[0]
+ : static_cast<int32_t>(reinterpret_cast<const int64_t *>(axis.data()->base())[0]);
+
+ // re-sizing output shape
+ ir::Shape new_shape = shape_inference::inferExpandDimsShape(input.info().shape(), axis_value);
+ output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::Fill &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+
+ const auto shape_idx{op.getInputs().at(ir::operation::Fill::Input::SHAPE)};
+ const auto &shape = operands.at(shape_idx);
+ const auto output_idx = op.getOutputs().at(0);
+ ir::Operand &output = operands.at(output_idx);
+
+ if (!shape.isConstant())
+ {
+ output.info().setDynamic();
+ return;
+ }
+
+ const auto dims_type = shape.typeInfo().type();
+ assert(dims_type == ir::DataType::INT32 || dims_type == ir::DataType::INT64);
+
+ auto dims_buf = shape.data()->base();
+ assert(dims_buf);
+
+ const auto &dims_shape = shape.info().shape();
+ const auto &new_shape = ((dims_type == ir::DataType::INT32)
+ ? shape_inference::inferFillShape<int32_t>(
+ dims_shape, reinterpret_cast<const int32_t *>(dims_buf))
+ : shape_inference::inferFillShape<int64_t>(
+ dims_shape, reinterpret_cast<const int64_t *>(dims_buf)));
+
+ output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::FullyConnected &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+
+ const auto input_idx{op.getInputs().at(ir::operation::FullyConnected::Input::INPUT)};
+ const auto &input = operands.at(input_idx);
+
+ const auto ker_idx{op.getInputs().at(ir::operation::FullyConnected::Input::WEIGHT)};
+ const auto &ker = operands.at(ker_idx);
+
+ // get mutable output operand
+ const auto output_idx = op.getOutputs().at(0);
+ ir::Operand &output = operands.at(output_idx);
+ // re-sizing output shape
+ ir::Shape new_shape =
+ shape_inference::inferFullyConnectedShape(input.info().shape(), ker.info().shape());
+ output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::FusedBatchNorm &op)
+{
+ handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::FusedBatchNorm::Input::INPUT));
+}
+
+void StaticShapeInferer::visit(const ir::operation::Gather &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+
+ const auto input_idx{op.getInputs().at(ir::operation::Gather::Input::INPUT)};
+ const auto &input = operands.at(input_idx);
+
+ // get mutable output operand
+ const auto output_idx = op.getOutputs().at(0);
+ ir::Operand &output = operands.at(output_idx);
+
+ const auto indices_idx{op.getInputs().at(ir::operation::Gather::Input::INDICES)};
+ const auto &indices = operands.at(indices_idx);
+ const auto rank = input.info().shape().rank();
+ const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
+
+ assert(0 <= axis && axis < rank);
+
+ // re-sizing output shape
+ ir::Shape new_shape =
+ shape_inference::inferGatherShape(input.info().shape(), indices.info().shape(), axis, rank);
+ output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::If &op)
+{
+ // re-sizing input shapes of then/else subgraph
+ const std::vector<ir::OperandIndex> inputs{op.getInputs().begin() + 1, op.getInputs().end()};
+
+ std::vector<ir::OperandInfo> inputs_info;
+ const auto &graph = _lowered_subg->graph();
+ for (size_t i = 0; i < inputs.size(); ++i)
+ {
+ const auto &operand_info = graph.operands().at(inputs.at(i)).info();
+ inputs_info.emplace_back(operand_info);
+ }
+ _subg_input_observers.at(op.param().then_subg_index)->updateShapes(inputs_info);
+ _child_inferers.at(op.param().then_subg_index)->infer();
+
+ _subg_input_observers.at(op.param().else_subg_index)->updateShapes(inputs_info);
+ _child_inferers.at(op.param().else_subg_index)->infer();
+}
+
+void StaticShapeInferer::visit(const ir::operation::L2Normalization &op)
+{
+ handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::L2Normalization::Input::INPUT));
+}
+
+void StaticShapeInferer::visit(const ir::operation::Loss &)
+{
+ // TODO Consider SparseCategoricalCrossentropy case
+
+ // TODO Consider output shape in case of reduction option
+}
+
+void StaticShapeInferer::visit(const ir::operation::LSTM &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+
+ const auto output_index{op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
+ auto &output = operands.at(output_index);
+
+ const auto output_state_out_index{
+ op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
+
+ const auto cell_state_out_index{op.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
+
+ const auto scratch_buffer_index{op.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
+
+ if (output.info().isDynamic() ||
+ (operands.exist(output_state_out_index) &&
+ operands.at(output_state_out_index).info().isDynamic()) ||
+ (operands.exist(cell_state_out_index) &&
+ operands.at(cell_state_out_index).info().isDynamic()) ||
+ (operands.exist(scratch_buffer_index) &&
+ operands.at(scratch_buffer_index).info().isDynamic()))
+ return;
+
+ const auto input_index{op.getInputs().at(ir::operation::LSTM::Input::INPUT)};
+ const auto &input = operands.at(input_index);
+
+ const auto input_to_output_weights_index{
+ op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
+ const auto &input_to_output_weights = operands.at(input_to_output_weights_index);
+
+ const auto recurrent_to_output_weights_index{
+ op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
+ const auto &recurrent_to_output_weights = operands.at(recurrent_to_output_weights_index);
+
+ // re-sizing outputs
+ const int n_batch = (input.shape().rank() == 3 && op.param().time_major) ? input.shape().dim(1)
+ : input.shape().dim(0);
+ const int n_cell = input_to_output_weights.shape().dim(0);
+ const int n_output = recurrent_to_output_weights.shape().dim(1);
+ if (input.shape().rank() == 3)
+ {
+ if (op.param().time_major)
+ output.info().shape(ir::Shape{input.shape().dim(0), n_batch, n_output});
+ else
+ output.info().shape(ir::Shape{n_batch, input.shape().dim(1), n_output});
+ }
+ else
+ {
+ assert(input.shape().rank() == 2);
+ output.info().shape(ir::Shape{n_batch, n_output});
+ }
+
+ if (operands.exist(output_state_out_index))
+ {
+ auto &output_state_out = operands.at(output_state_out_index);
+ output_state_out.info().shape(ir::Shape{n_batch, n_output});
+ }
+
+ if (operands.exist(cell_state_out_index))
+ {
+ auto &cell_state_out = operands.at(cell_state_out_index);
+ cell_state_out.info().shape(ir::Shape{n_batch, n_cell});
+ }
+
+ if (operands.exist(scratch_buffer_index))
+ {
+ auto &scratch_buffer = operands.at(scratch_buffer_index);
+
+ const auto input_to_input_weights_index{
+ op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)};
+ const auto recurrent_to_input_weights_index{
+ op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)};
+
+ bool has_input_to_input_weights =
+ operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
+ operands.at(input_to_input_weights_index).shape().dim(1) != 0;
+ bool has_recurrent_to_input_weights =
+ operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+ operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
+
+ // NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG).
+ // true: no CIFG
+ // false: CIFG
+ bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
+ if (has_cifg_param)
+ {
+ scratch_buffer.info().shape(ir::Shape{n_batch, n_cell * 4});
+ }
+ else
+ {
+ scratch_buffer.info().shape(ir::Shape{n_batch, n_cell * 3});
+ }
+ }
+}
+
+void StaticShapeInferer::visit(const ir::operation::MatrixBandPart &op)
+{
+ handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::MatrixBandPart::Input::INPUT));
+}
+
+void StaticShapeInferer::visit(const ir::operation::OneHot &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+
+ const auto indice_idx{op.getInputs().at(ir::operation::OneHot::Input::INDICES)};
+ const auto &indice = operands.at(indice_idx);
+ const auto depth_idx{op.getInputs().at(ir::operation::OneHot::Input::DEPTH)};
+ const auto &depth = operands.at(depth_idx);
+
+ const auto axis = op.param().axis;
+
+ auto output_idx = op.getOutputs().at(0);
+ ir::Operand &output = operands.at(output_idx);
+
+ if (!depth.isConstant())
+ {
+ output.info().setDynamic();
+ return;
+ }
+
+ const auto *depth_buf = reinterpret_cast<const int32_t *>(depth.data()->base());
+ assert(depth_buf);
+ // re-sizing output shape
+ ir::Shape new_shape = shape_inference::inferOnehotShape(indice.info().shape(), *depth_buf, axis);
+ output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::Pack &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+
+ const auto input_idx{op.getInputs().at(0)};
+ const auto &input = operands.at(input_idx);
+
+ // get mutable output operand
+ const auto output_idx = op.getOutputs().at(0);
+ ir::Operand &output = operands.at(output_idx);
+
+ const auto rank = input.shape().rank() + 1;
+ const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
+ const auto num = op.param().num;
+
+ assert(0 <= axis && axis < rank);
+
+ // re-sizing output shape
+ ir::Shape new_shape = shape_inference::inferPackShape(input.info().shape(), axis, rank, num);
+ output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::Pad &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+
+ const auto input_idx{op.getInputs().at(ir::operation::Pad::Input::INPUT)};
+ const auto &input = operands.at(input_idx);
+
+ const auto pad_idx{op.getInputs().at(ir::operation::Pad::Input::PAD)};
+ const auto &pad = operands.at(pad_idx);
+
+ // get mutable output operand
+ const auto output_idx = op.getOutputs().at(0);
+ ir::Operand &output = operands.at(output_idx);
+
+ // if pad is not constant, output also becomes dynamic
+ if (!pad.isConstant())
+ {
+ output.info().setDynamic();
+ return;
+ }
+
+ // re-sizing output shape
+ const auto new_shape = shape_inference::inferPadShape(
+ input.shape(), reinterpret_cast<const int32_t *>(pad.data()->base()),
+ pad.shape().num_elements());
+ output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::Permute &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+
+ const auto input_idx{op.getInputs().at(0)};
+ const auto &input = operands.at(input_idx);
+ const auto output_idx = op.getOutputs().at(0);
+ ir::Operand &output = operands.at(output_idx);
+
+ // re-sizing output shape
+ // Permute is a special operation that layouts of input/output may be different on backend
+ // However, it is not applied here, so input/output have the same layout of frontend. Because
+ // "ExecutorFactory" would convert shape of input/output accoding to the layouts when registering
+ // operand info to "TensorBuilder" after calling "StaticShapeInferer"
+ const auto &new_shape = input.info().shape();
+ output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::Pow &op)
+{
+ handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Pow::Input::LHS),
+ op.getInputs().at(ir::operation::Pow::Input::RHS));
+}
+
+void StaticShapeInferer::visit(const ir::operation::Range &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+
+ const auto start_idx{op.getInputs().at(ir::operation::Range::Input::START)};
+ const auto limit_idx{op.getInputs().at(ir::operation::Range::Input::LIMIT)};
+ const auto delta_idx{op.getInputs().at(ir::operation::Range::Input::DELTA)};
+ const auto &start_op = operands.at(start_idx);
+ const auto &limit_op = operands.at(limit_idx);
+ const auto &delta_op = operands.at(delta_idx);
+
+ // get mutable output operand
+ const auto output_idx = op.getOutputs().at(0);
+ ir::Operand &output = operands.at(output_idx);
+
+ ir::Shape new_shape;
+ if (start_op.isConstant() && limit_op.isConstant() && delta_op.isConstant())
+ {
+ assert(start_op.typeInfo().type() == limit_op.typeInfo().type() &&
+ start_op.typeInfo().type() == delta_op.typeInfo().type());
+ if (output.typeInfo().type() == ir::DataType::FLOAT32)
+ {
+ new_shape = shape_inference::inferRangeShape<float>(
+ start_op.asScalar<float>(), limit_op.asScalar<float>(), delta_op.asScalar<float>());
+ }
+ else if (output.typeInfo().type() == ir::DataType::INT32)
+ {
+ new_shape = shape_inference::inferRangeShape<int32_t>(
+ start_op.asScalar<int32_t>(), limit_op.asScalar<int32_t>(), delta_op.asScalar<int32_t>());
+ }
+ assert(output.shape() == new_shape);
+ }
+ else
+ {
+ output.info().setDynamic();
+ }
+}
+
+void StaticShapeInferer::visit(const ir::operation::Reduce &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+
+ const auto input_idx{op.getInputs().at(ir::operation::Reduce::Input::INPUT)};
+ const auto &input = operands.at(input_idx);
+
+ const auto axes_idx{op.getInputs().at(ir::operation::Reduce::Input::AXES)};
+ const auto &axes = operands.at(axes_idx);
+
+ // get mutable output operand
+ const auto output_idx = op.getOutputs().at(0);
+ ir::Operand &output = operands.at(output_idx);
+
+ std::vector<int32_t> axes_vec;
+ for (size_t i = 0; i < axes.shape().num_elements(); ++i)
+ {
+ switch (axes.typeInfo().type())
+ {
+ case ir::DataType::INT32:
+ {
+ axes_vec.emplace_back(reinterpret_cast<const int32_t *>(axes.data()->base())[i]);
+ break;
+ }
+ case ir::DataType::INT64:
+ {
+ axes_vec.emplace_back(reinterpret_cast<const int64_t *>(axes.data()->base())[i]);
+ break;
+ }
+ default:
+ throw std::runtime_error("StaticShapeInferer " + op.name() + ": Not supported data type");
+ break;
+ }
+ }
+ const auto keep_dims = op.param().keep_dims;
+
+ // re-sizing output shape
+ ir::Shape new_shape =
+ shape_inference::inferReduceShape(input.info().shape(), axes_vec, keep_dims);
+ output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::Reshape &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+
+ const auto input_idx{op.getInputs().at(ir::operation::Reshape::Input::INPUT)};
+ const auto &input = operands.at(input_idx);
+
+ // get mutable output operand
+ const auto output_idx = op.getOutputs().at(0);
+ ir::Operand &output = operands.at(output_idx);
+
+ // New shape is given by second input tensor
+ if (op.getInputs().size() == 2)
+ {
+ // Let's check the second input
+ const auto shape_idx{op.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
+ const auto &shape = operands.at(shape_idx);
+
+ if (shape.isConstant())
+ {
+ const auto *shape_buf = reinterpret_cast<const int32_t *>(shape.data()->base());
+ assert(shape_buf);
+
+ ir::Shape new_shape = shape_inference::inferReshapeShape(
+ shape_buf, shape.shape().num_elements(), input.shape().num_elements());
+
+ // if shape is from Const, TFLC put the shape of output into tensor
+ if (new_shape != output.shape())
+ {
+ // change on output shape
+ output.info().shape(new_shape);
+ }
+ }
+ else
+ {
+ // if shape is NOT Const, set output shape to be dynamic_
+ output.info().setDynamic();
+ }
+ }
+ // New shape is given by option
+ else if (op.param().new_shape.size() != 0)
+ {
+ // Let's check the new_shape option
+ auto shape = op.param().new_shape;
+ ir::Shape new_shape =
+ shape_inference::inferReshapeShape(shape.data(), shape.size(), input.shape().num_elements());
+
+ if (new_shape != output.shape())
+ {
+ // change on output shape
+ output.info().shape(new_shape);
+ }
+ }
+ else
+ {
+ throw std::runtime_error("Reshape: new shape is missing");
+ }
+}
+
+void StaticShapeInferer::visit(const ir::operation::ResizeBilinear &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+
+ const auto input_idx{op.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
+ const auto &input = operands.at(input_idx);
+
+ // get mutable output operand
+ const auto output_idx = op.getOutputs().at(0);
+ ir::Operand &output = operands.at(output_idx);
+
+ int32_t height_out, width_out;
+ if (op.getInputs().size() == 2)
+ {
+ auto &size = operands.at(op.getInputs().at(ir::operation::ResizeBilinear::Input::SIZE));
+ if (!size.isConstant())
+ {
+ output.info().setDynamic();
+ return;
+ }
+ const auto size_v = size.asVector<std::int32_t>();
+ height_out = size_v[0];
+ width_out = size_v[1];
+ }
+ else
+ {
+ height_out = op.param().height_out;
+ width_out = op.param().width_out;
+ }
+
+ // Shape inferencing logic based on Params
+ ir::Shape new_shape =
+ shape_inference::inferResizeBilinearShape(input.shape(), height_out, width_out);
+
+ // if size_op is from Const, TFLC put the shape of output into tensor
+ if (new_shape != output.shape())
+ {
+ // change on output shape
+ output.info().shape(new_shape);
+ }
+}
+
+void StaticShapeInferer::visit(const ir::operation::Reverse &op)
+{
+ handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Reverse::Input::INPUT));
+}
+
+void StaticShapeInferer::visit(const ir::operation::Select &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+
+ const auto input_cond_idx{op.getInputs().at(ir::operation::Select::Input::CONDITION)};
+ const auto &input_cond = operands.at(input_cond_idx);
+
+ const auto input_true_idx{op.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
+ const auto &input_true = operands.at(input_true_idx);
+
+ const auto input_false_idx{op.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
+ const auto &input_false = operands.at(input_false_idx);
+
+ auto output_idx = op.getOutputs().at(0);
+ ir::Operand &output = operands.at(output_idx);
+
+ // Select output shpae
+ ir::Shape new_shape = shape_inference::inferSelectShape(
+ input_cond.info().shape(), input_true.info().shape(), input_false.info().shape());
+ output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::Shape &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+
+ const auto input_idx{op.getInputs().at(0)};
+ const auto &input = operands.at(input_idx);
+
+ // get mutable output operand
+ const auto output_idx = op.getOutputs().at(0);
+ ir::Operand &output = operands.at(output_idx);
+
+ // re-sizing output shape
+ ir::Shape output_shape;
+ output_shape.append(input.info().shape().rank());
+
+ output.info().shape(output_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::Slice &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+
+ const auto input_index{op.getInputs().at(ir::operation::Slice::Input::INPUT)};
+ const auto &input = operands.at(input_index);
+ const auto begins_index{op.getInputs().at(ir::operation::Slice::Input::BEGINS)};
+ const auto &begins = operands.at(begins_index);
+ const auto sizes_index{op.getInputs().at(ir::operation::Slice::Input::SIZES)};
+ const auto &sizes = operands.at(sizes_index);
+ const auto output_index = op.getOutputs().at(0);
+ ir::Operand &output = operands.at(output_index);
+
+ // Whether input is constant or not does not affect whether output is dynamic or not
+ if (!(begins.isConstant() && sizes.isConstant()))
+ {
+ output.info().setDynamic();
+ return;
+ }
+
+ auto begins_buf = begins.data()->base();
+ auto sizes_buf = sizes.data()->base();
+
+ const auto begins_type = begins.typeInfo().type();
+ assert(begins_type == ir::DataType::INT32 || begins_type == ir::DataType::INT64);
+ assert(begins_type == sizes.typeInfo().type());
+
+ ir::Shape new_shape =
+ (begins_type == ir::DataType::INT32)
+ ? shape_inference::inferSliceShape<int32_t>(input.info().shape(),
+ reinterpret_cast<const int32_t *>(begins_buf),
+ reinterpret_cast<const int32_t *>(sizes_buf))
+ : shape_inference::inferSliceShape<int64_t>(input.info().shape(),
+ reinterpret_cast<const int64_t *>(begins_buf),
+ reinterpret_cast<const int64_t *>(sizes_buf));
+ output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::Softmax &op)
+{
+ handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Softmax::Input::INPUT));
+}
+
+void StaticShapeInferer::visit(const ir::operation::SpaceToBatchND &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+
+ const auto output_index = op.getOutputs().at(0);
+ const auto input_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
+ const auto &block_shape_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
+ const auto &padding_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
+
+ ir::Operand &output = operands.at(output_index);
+ const auto &input = operands.at(input_idx);
+ const auto &block_shape = operands.at(block_shape_idx);
+ const auto &padding = operands.at(padding_idx);
+
+ // Whether input is constant or not does not affect whether output is dynamic or not
+ if (!(block_shape.isConstant() && padding.isConstant()))
+ {
+ output.info().setDynamic();
+ return;
+ }
+
+ const auto &input_shape = input.info().shape();
+ const auto &block_shape_shape = block_shape.info().shape();
+ const auto &padding_shape = padding.info().shape();
+
+ auto block_shape_data = reinterpret_cast<const int32_t *>(block_shape.data()->base());
+ auto padding_data = reinterpret_cast<const int32_t *>(padding.data()->base());
+
+ ir::Shape new_shape = shape_inference::inferSpaceToBatchNDShape(
+ input_shape, block_shape_shape, padding_shape, block_shape_data, padding_data);
+
+ output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::Split &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+
+ const auto input_idx{op.getInputs().at(ir::operation::Split::Input::INPUT)};
+ const auto &input = operands.at(input_idx);
+
+ const auto axis_idx{op.getInputs().at(ir::operation::Split::Input::AXIS)};
+ const auto &axis = operands.at(axis_idx);
+
+ auto outputs = op.getOutputs();
+ if (!axis.isConstant())
+ {
+ for (auto &&output_idx : outputs)
+ {
+ ir::Operand &output = operands.at(output_idx);
+ output.info().setDynamic();
+ }
+ return;
+ }
+
+ const auto num_splits = op.param().num_splits;
+
+ const auto rank = input.info().shape().rank();
+ auto axis_value = axis.asScalar<int32_t>();
+ axis_value = axis_value < 0 ? axis_value + rank : axis_value;
+
+ assert(0 <= axis_value && axis_value < rank);
+
+ ir::Shape new_shape =
+ shape_inference::inferSplitShape(input.info().shape(), axis_value, num_splits);
+ for (auto &&output_idx : outputs)
+ {
+ ir::Operand &output = operands.at(output_idx);
+ output.info().shape(new_shape);
+ }
+}
+
+void StaticShapeInferer::visit(const ir::operation::SquaredDifference &op)
+{
+ handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::SquaredDifference::Input::LHS),
+ op.getInputs().at(ir::operation::SquaredDifference::Input::RHS));
+}
+
+void StaticShapeInferer::visit(const ir::operation::Squeeze &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+
+ const auto input_idx{op.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
+ const auto &input = operands.at(input_idx);
+
+ const auto output_idx = op.getOutputs().at(0);
+ ir::Operand &output = operands.at(output_idx);
+
+ // Squeeze output shpae
+ ir::Shape new_shape = shape_inference::inferSqueezeShape(input.info().shape(), op.param());
+ output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::StridedSlice &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+
+ const auto input_index{op.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
+ const auto &input = operands.at(input_index);
+ const auto starts_index{op.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
+ const auto &starts = operands.at(starts_index);
+ const auto ends_index{op.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
+ const auto &ends = operands.at(ends_index);
+ const auto strides_index{op.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
+ const auto &strides = operands.at(strides_index);
+ const auto output_index = op.getOutputs().at(0);
+ ir::Operand &output = operands.at(output_index);
+
+ if (!(starts.isConstant() && ends.isConstant() && strides.isConstant()))
+ {
+ output.info().setDynamic();
+ return;
+ }
+
+ const auto begin_mask = op.param().begin_mask;
+ const auto end_mask = op.param().end_mask;
+ const auto shrink_axis_mask = op.param().shrink_axis_mask;
+ const auto rank = input.info().shape().rank();
+
+ auto starts_buf = reinterpret_cast<const uint32_t *>(starts.data()->base());
+ auto ends_buf = reinterpret_cast<const uint32_t *>(ends.data()->base());
+ auto strides_buf = reinterpret_cast<const uint32_t *>(strides.data()->base());
+
+ auto op_params = shape_inference::buildStridedSliceParams(
+ starts_buf, ends_buf, strides_buf, begin_mask, end_mask, shrink_axis_mask, rank);
+
+ ir::Shape new_shape =
+ shape_inference::inferStridedSliceShape(input.info().shape(), op_params, rank);
+ output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::Tile &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+
+ const auto input_idx{op.getInputs().at(ir::operation::Tile::Input::INPUT)};
+ const auto &input = operands.at(input_idx);
+
+ const auto multiplier_idx{op.getInputs().at(ir::operation::Tile::Input::MULTIPLES)};
+ const auto &multiplier = operands.at(multiplier_idx);
+
+ const auto output_idx = op.getOutputs().at(0);
+ ir::Operand &output = operands.at(output_idx);
+
+ if (!multiplier.isConstant())
+ {
+ output.info().setDynamic();
+ return;
+ }
+
+ auto multiplier_buffer = reinterpret_cast<const int32_t *>(multiplier.data()->base());
+ assert(multiplier_buffer);
+
+ // re-sizing output shape
+ auto new_shape = shape_inference::inferTileShape(input.info().shape(), multiplier_buffer,
+ multiplier.shape().num_elements());
+ output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::Transpose &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+
+ const auto input_idx{op.getInputs().at(ir::operation::Transpose::Input::INPUT)};
+ const auto &input = operands.at(input_idx);
+
+ const auto perm_idx{op.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
+ const auto &perm = operands.at(perm_idx);
+
+ // perm.shape() != ir::Shape{0} means that perm is (n-1...0)
+ // TODO This condition changes to perm.num_elements() == 0
+ const auto is_regular_transpose = perm.shape() == ir::Shape{0};
+
+ // get mutable output operand
+ const auto output_idx = op.getOutputs().at(0);
+ auto &output = operands.at(output_idx);
+ if (!perm.isConstant() && !is_regular_transpose)
+ {
+ output.info().setDynamic();
+ return;
+ }
+
+ ir::Shape new_shape;
+ if (is_regular_transpose)
+ {
+ // Call by (n-1...0)
+ new_shape = shape_inference::inferTransposeShape(input.info().shape(), nullptr, 0);
+ }
+ else
+ {
+ // Check rank
+ if (input.info().shape().rank() != static_cast<int>(perm.info().shape().num_elements()))
+ {
+ throw std::runtime_error("StaticShapeInferer failed, bad rank size: " +
+ std::to_string(perm.info().shape().num_elements()));
+ }
+
+ // set output shape, based on input and params
+ const auto perm_buf = reinterpret_cast<const int32_t *>(perm.data()->base());
+ new_shape = shape_inference::inferTransposeShape(input.info().shape(), perm_buf,
+ perm.shape().num_elements());
+ }
+ output.info().shape(new_shape);
+}
+
+void StaticShapeInferer::visit(const ir::operation::Unpack &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+
+ const auto input_idx{op.getInputs().at(0)};
+ const auto &input = operands.at(input_idx);
+ const auto num = op.param().num;
+ const auto rank = input.shape().rank();
+ const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
+
+ assert(axis < rank);
+ if (axis < 0)
+ {
+ for (int out_tensor_idx = 0; out_tensor_idx < num; out_tensor_idx++)
+ {
+ const auto output_idx = op.getOutputs().at(out_tensor_idx);
+ ir::Operand &output = operands.at(output_idx);
+ output.info().setDynamic();
+ }
+ return;
+ }
+
+ ir::Shape new_shape = shape_inference::inferUnpackShape(input.info().shape(), axis, rank);
+
+ // re-sizing output shape
+ for (int out_tensor_idx = 0; out_tensor_idx < num; out_tensor_idx++)
+ {
+ const auto output_idx = op.getOutputs().at(out_tensor_idx);
+ ir::Operand &output = operands.at(output_idx);
+ output.info().shape(new_shape);
+ }
+}
+
+void StaticShapeInferer::visit(const ir::operation::While &op)
+{
+ auto body_input_observer = _subg_input_observers.at(op.param().body_subg_index).get();
+ auto cond_input_observer = _subg_input_observers.at(op.param().cond_subg_index).get();
+ // re-sizing input shapes of body subgraph
+ const auto &inputs = op.getInputs();
+ std::vector<ir::OperandInfo> inputs_info;
+ const auto &graph = _lowered_subg->graph();
+ for (size_t i = 0; i < inputs.size(); ++i)
+ {
+ const auto &operand_info = graph.operands().at(inputs.at(i)).info();
+ inputs_info.emplace_back(operand_info);
+ }
+
+ body_input_observer->updateShapes(inputs_info);
+ _child_inferers.at(op.param().body_subg_index)->infer();
+
+ // Check whether while operation's shapes are predictable
+ // This while op's outputs are also updated in the above function
+ // "_child_inferers.at(op.param().body_subg_index)->update()". That means that body's outputs and
+ // thils op's outputs must have the same shape. So we can predict whether body subgraphs will
+ // change at every step by comparing the shapes of inputs/outputs. If any of shape of body outputs
+ // and inputs are different Non-constant operands will be set to dynamic.
+ bool check_unpredictable_dynamic = false;
+ const auto &updated_outputs = op.getOutputs();
+ assert(inputs_info.size() == updated_outputs.size());
+ for (size_t i = 0; i < updated_outputs.size(); ++i)
+ {
+ const auto &input_info = inputs_info.at(i);
+ const auto &output_info = graph.operands().at(updated_outputs.at(i)).info();
+ if (input_info.isDynamic() != output_info.isDynamic() ||
+ input_info.shape() != output_info.shape())
+ {
+ check_unpredictable_dynamic = true;
+ break;
+ }
+ }
+
+ if (check_unpredictable_dynamic)
+ {
+ body_input_observer->updateShapes(inputs_info, check_unpredictable_dynamic);
+ _child_inferers.at(op.param().body_subg_index)->infer();
+ }
+ cond_input_observer->updateShapes(inputs_info, check_unpredictable_dynamic);
+ _child_inferers.at(op.param().cond_subg_index)->infer();
+}
+
+void StaticShapeInferer::visit(const ir::operation::DetectionPostProcess &op)
+{
+ // TODO: NMS supports very limited input/output size.
+ ir::operation::DetectionPostProcess::Param param = op.param();
+
+ auto &operands = _lowered_subg->graph().operands();
+ const int num_detected_boxes = param.max_detections * param.max_classes_per_detection;
+
+ const auto output_idx1 = op.getOutputs().at(0);
+ auto &output1 = operands.at(output_idx1);
+ output1.info().shape({1, num_detected_boxes, 4});
+
+ const auto output_idx2 = op.getOutputs().at(1);
+ auto &output2 = operands.at(output_idx2);
+ output2.info().shape({1, num_detected_boxes});
+
+ const auto output_idx3 = op.getOutputs().at(2);
+ auto &output3 = operands.at(output_idx3);
+ output3.info().shape({1, num_detected_boxes});
+
+ const auto output_idx4 = op.getOutputs().at(3);
+ auto &output4 = operands.at(output_idx4);
+ output4.info().shape({1});
+}
+void StaticShapeInferer::visit(const ir::operation::Bulk &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+
+ // TODO: support multiple inputs/outputs
+ const auto input_idx{op.getInputs().at(0)};
+ const auto &input = operands.at(input_idx);
+ const auto output_idx = op.getOutputs().at(0);
+ ir::Operand &output = operands.at(output_idx);
+
+ const auto &cur_input_shape = input.info().shape();
+ auto origin_output_shape = op.param().origin_output_shapes[0];
+
+ // TODO: more check for valid batch request
+ if ((cur_input_shape.dim(0) < origin_output_shape.dim(0)) ||
+ (cur_input_shape.dim(0) % origin_output_shape.dim(0) != 0))
+ {
+ throw std::runtime_error("StaticShapeInferer " + op.name() + ": Not supported batch size");
+ }
+ size_t batch_multiplier = cur_input_shape.dim(0) / origin_output_shape.dim(0);
+
+ ir::Shape new_shape;
+ new_shape.append(origin_output_shape.dim(0) * batch_multiplier);
+ for (int32_t d = 1; d < origin_output_shape.rank(); ++d)
+ new_shape.append(origin_output_shape.dim(d));
+
+ output.info().shape(new_shape);
+}
+
+} // namespace compiler
+
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/TensorBuilders.h b/runtime/onert/core/src/compiler/TensorBuilders.h
deleted file mode 100644
index 3b0360b4b..000000000
--- a/runtime/onert/core/src/compiler/TensorBuilders.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_COMPILER_TENSOR_BUILDERS_H__
-#define __ONERT_COMPILER_TENSOR_BUILDERS_H__
-
-#include <unordered_set>
-#include <memory>
-#include "backend/BackendContext.h"
-#include "backend/Backend.h"
-#include "backend/controlflow/Config.h"
-#include "backend/controlflow/TensorBuilder.h"
-#include "util/logging.h"
-
-namespace onert
-{
-namespace compiler
-{
-
-class TensorBuilders
-{
-public:
- TensorBuilders() = default;
-
- TensorBuilders(const onert::backend::BackendContexts &backend_contexts, bool include_controlflow)
- {
- for (const auto &e : backend_contexts)
- {
- if (e.first->config()->id() == backend::controlflow::Config::ID)
- {
- _cf_tensor_builder = std::dynamic_pointer_cast<backend::controlflow::TensorBuilder>(
- e.second->tensor_builder);
- if (include_controlflow)
- _tensor_builders.insert(e.second->tensor_builder);
- }
- else
- {
- _tensor_builders.insert(e.second->tensor_builder);
- }
- }
- }
-
- std::unordered_set<std::shared_ptr<onert::backend::ITensorBuilder>>::const_iterator begin() const
- {
- return _tensor_builders.cbegin();
- }
- std::unordered_set<std::shared_ptr<onert::backend::ITensorBuilder>>::const_iterator end() const
- {
- return _tensor_builders.cend();
- }
-
- std::shared_ptr<backend::controlflow::TensorBuilder> getControlflowTensorBuilder() const
- {
- return _cf_tensor_builder;
- }
-
-private:
- std::unordered_set<std::shared_ptr<backend::ITensorBuilder>> _tensor_builders;
- std::shared_ptr<backend::controlflow::TensorBuilder> _cf_tensor_builder;
-};
-
-} // namespace compiler
-} // namespace onert
-
-#endif // __ONERT_COMPILER_TENSOR_BUILDERS_H__
diff --git a/runtime/onert/core/src/compiler/TensorRegistries.h b/runtime/onert/core/src/compiler/TensorRegistries.h
index 8be87b081..c7e06e84c 100644
--- a/runtime/onert/core/src/compiler/TensorRegistries.h
+++ b/runtime/onert/core/src/compiler/TensorRegistries.h
@@ -17,13 +17,14 @@
#ifndef __ONERT_COMPILER_TENSOR_REGISTRIES_H__
#define __ONERT_COMPILER_TENSOR_REGISTRIES_H__
-#include <unordered_set>
-#include <memory>
-#include "backend/BackendContext.h"
+#include "../backend/builtin/Config.h"
+#include "../backend/builtin/TensorRegistry.h"
+
#include "backend/Backend.h"
-#include "backend/controlflow/Config.h"
-#include "backend/controlflow/TensorBuilder.h"
-#include "backend/controlflow/TensorRegistry.h"
+#include "backend/BackendContext.h"
+
+#include <memory>
+#include <unordered_set>
namespace onert
{
@@ -35,17 +36,16 @@ class TensorRegistries
public:
TensorRegistries() = default;
- TensorRegistries(const onert::backend::BackendContexts &backend_contexts,
- bool include_controlflow)
+ TensorRegistries(const onert::backend::BackendContexts &backend_contexts, bool include_builtin)
{
for (const auto &e : backend_contexts)
{
auto tensor_reg = e.second->tensor_registry;
- if (e.first->config()->id() == backend::controlflow::Config::ID)
+ if (e.first->config()->id() == backend::builtin::Config::ID)
{
- _cf_tensor_reg =
- std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(tensor_reg);
- if (include_controlflow)
+ _builtin_tensor_reg =
+ std::dynamic_pointer_cast<backend::builtin::TensorRegistry>(tensor_reg);
+ if (include_builtin)
_tensor_regs.insert(tensor_reg);
}
else
@@ -64,14 +64,14 @@ public:
return _tensor_regs.cend();
}
- std::shared_ptr<backend::controlflow::TensorRegistry> getControlflowTensorRegistry() const
+ std::shared_ptr<backend::builtin::TensorRegistry> getBuiltinTensorRegistry() const
{
- return _cf_tensor_reg;
+ return _builtin_tensor_reg;
}
- std::shared_ptr<backend::ITensor> getITensor(ir::OperandIndex ind) const
+ backend::ITensor *getITensor(ir::OperandIndex ind) const
{
- for (auto &tensor_reg : _tensor_regs)
+ for (auto &&tensor_reg : _tensor_regs)
{
auto tensor = tensor_reg->getITensor(ind);
if (tensor)
@@ -82,7 +82,7 @@ public:
private:
std::unordered_set<std::shared_ptr<backend::ITensorRegistry>> _tensor_regs;
- std::shared_ptr<backend::controlflow::TensorRegistry> _cf_tensor_reg;
+ std::shared_ptr<backend::builtin::TensorRegistry> _builtin_tensor_reg;
};
} // namespace compiler
diff --git a/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc
index 647669e46..a6590b13f 100644
--- a/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc
+++ b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc
@@ -17,8 +17,9 @@
#include "ConstantInsertionPass.h"
#include "backend/Backend.h"
-#include <ir/Graph.h>
-#include <util/Utils.h>
+#include "ir/Graph.h"
+#include "util/Utils.h"
+#include "util/logging.h"
namespace onert
{
@@ -27,15 +28,14 @@ namespace compiler
namespace pass
{
-void ConstantInsertionPass::callback(const ir::OperationIndex &node_index, ir::Operation &node)
+void ConstantInsertionPass::callback(const ir::OperationIndex &node_index, ir::IOperation &node)
{
- const auto &op_sequence_index = _lowered_graph.op_seqs().getOperation(node_index);
- const auto op_seq_lower_info = _lowered_graph.getLowerInfo(op_sequence_index);
- const auto backend = op_seq_lower_info->backend();
- const auto layout = op_seq_lower_info->layout();
- const auto factor = ir::operand::PermuteFactor{backend, layout};
+ const auto op_lower_info = _lowered_graph.lower_info().operation.getRawPtr(node_index);
+ const auto backend = op_lower_info->backend();
+ const auto layout = op_lower_info->layout();
+ const auto factor = PermuteFactor{backend, layout};
- for (const auto input : node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
+ for (const auto &input : node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
{
auto &object = _graph.operands().at(input);
@@ -44,22 +44,13 @@ void ConstantInsertionPass::callback(const ir::OperationIndex &node_index, ir::O
const auto key = ReplaceKey{input, factor};
if (_replace_operands_map.count(key) == 0)
{
- auto new_object = object;
- new_object.unsetDef();
- // TODO Remove const_case
- const_cast<ir::OperationIndexSet &>(new_object.getUses()).clear();
+ ir::Operand new_object(object);
+ new_object.clearDefUse();
const auto new_index = _graph.operands().emplace(new_object);
_replace_operands_map[key] = new_index;
}
const auto replaced_input = _replace_operands_map[key];
- // Update op_seq
- if (_lowered_graph.op_seqs().at(op_sequence_index).getInputs().contains(input))
- {
- // All inputs of op_seq have the same PermuteFactor because those inputs are inputs of first
- // operation
- _lowered_graph.op_seqs().at(op_sequence_index).replaceInputs(input, replaced_input);
- }
// Update the same inputs of a node at once because inputs of an operation have the same
// PermuteFactor
@@ -69,6 +60,8 @@ void ConstantInsertionPass::callback(const ir::OperationIndex &node_index, ir::O
auto &replaced_object = _graph.operands().at(replaced_input);
replaced_object.insertUse(node_index);
+ VERBOSE(ConstInsertPass) << "New operand " << replaced_input << " added(copy of " << input
+ << ") for " << factor << std::endl;
// Remove this node from uses of origin operand
// Constant operand has no def.
assert(!object.getDef().valid());
@@ -76,12 +69,16 @@ void ConstantInsertionPass::callback(const ir::OperationIndex &node_index, ir::O
// Remove origin operand
if (object.getUses().size() == 0)
+ {
_graph.removeOperand(input);
+ VERBOSE(ConstInsertPass) << "Original operand " << input << " removed - no uses"
+ << std::endl;
+ }
}
}
// Now this runtime does not support the node making output as constant
- for (const auto &output : node.getOutputs())
+ for (const auto &output : node.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
{
UNUSED_RELEASE(output);
assert(!_graph.operands().at(output).isConstant());
diff --git a/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h
index 052883c92..d5b9aa14e 100644
--- a/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h
+++ b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h
@@ -17,7 +17,7 @@
#ifndef __ONERT_COMPILER_PASS_CONSTANT_INSERTION_PASS_H__
#define __ONERT_COMPILER_PASS_CONSTANT_INSERTION_PASS_H__
-#include <ir/operand/PermuteFactor.h>
+#include <compiler/PermuteFactor.h>
#include <ir/Index.h>
#include "LoweredOperationPass.h"
#include <unordered_map>
@@ -39,13 +39,13 @@ public:
std::string id() final { return "ConstantInsertionPass"; }
public:
- void callback(const ir::OperationIndex &index, ir::Operation &node) final;
+ void callback(const ir::OperationIndex &index, ir::IOperation &node) final;
private:
struct ReplaceKey
{
ir::OperandIndex index;
- ir::operand::PermuteFactor factor;
+ PermuteFactor factor;
bool operator==(const ReplaceKey &other) const
{
@@ -61,8 +61,7 @@ private:
std::size_t operator()(const ReplaceKey &key) const noexcept
{
using std::hash;
- return hash<ir::OperandIndex>()(key.index) ^
- (hash<ir::operand::PermuteFactor>()(key.factor) << 1);
+ return hash<ir::OperandIndex>()(key.index) ^ (hash<PermuteFactor>()(key.factor) << 1);
}
};
diff --git a/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc
index 1c1dbe0ee..32e32d0ef 100644
--- a/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc
+++ b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc
@@ -18,8 +18,9 @@
#include "backend/Backend.h"
#include <ir/Graph.h>
-#include <ir/operand/PermuteFactor.h>
+#include <compiler/PermuteFactor.h>
#include <util/Utils.h>
+#include "util/logging.h"
namespace onert
{
@@ -28,25 +29,25 @@ namespace compiler
namespace pass
{
-void ConstantLoweringPass::callback(const ir::OperationIndex &node_index, ir::Operation &node)
+void ConstantLoweringPass::callback(const ir::OperationIndex &node_index, ir::IOperation &node)
{
- const auto &op_sequence_index = _lowered_graph.op_seqs().getOperation(node_index);
- const auto op_seq_lower_info = _lowered_graph.getLowerInfo(op_sequence_index);
- const auto backend = op_seq_lower_info->backend();
- const auto layout = op_seq_lower_info->layout();
- const auto factor = ir::operand::PermuteFactor{backend, layout};
+ const auto op_lower_info = _lowered_graph.lower_info().operation.getRawPtr(node_index);
+ const auto backend = op_lower_info->backend();
+ const auto layout = op_lower_info->layout();
+ const auto factor = PermuteFactor{backend, layout};
// Now this runtime does not support the node making output of operation as constant
- for (const auto input : node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
+ for (const auto &input : node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
{
auto &object = _graph.operands().at(input);
if (object.isConstant())
{
// All constant operand are already assinged at each backend by ContantInsertionPass. So a
// constant has `def` and `use` as the same PermuteFactor
- _lowered_graph.setLowerInfo(input, std::make_unique<ir::operand::LowerInfo>());
- _lowered_graph.getLowerInfo(input)->addDefPermuteFactor(factor);
- _lowered_graph.getLowerInfo(input)->addUsePermuteFactor(factor);
+ auto operand_li = std::make_unique<compiler::OperandLowerInfo>();
+ operand_li->addDefPermuteFactor(factor);
+ operand_li->addUsePermuteFactor(factor);
+ _lowered_graph.lower_info().operand.set(input, std::move(operand_li));
}
}
}
diff --git a/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.h b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.h
index e17d776d1..d60a1033f 100644
--- a/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.h
+++ b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.h
@@ -36,7 +36,7 @@ public:
std::string id() final { return "ConstantLoweringPass"; }
public:
- void callback(const ir::OperationIndex &index, ir::Operation &node) final;
+ void callback(const ir::OperationIndex &index, ir::IOperation &node) final;
};
} // namespace pass
diff --git a/runtime/onert/core/src/compiler/pass/ConstantOutputPass.cc b/runtime/onert/core/src/compiler/pass/ConstantOutputPass.cc
new file mode 100644
index 000000000..1448de473
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/ConstantOutputPass.cc
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConstantOutputPass.h"
+
+#include "ir/Graph.h"
+#include "ir/operation/Permute.h"
+#include "util/logging.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace pass
+{
+
+void ConstantOutputPass::callback(const ir::OperandIndex &ind, ir::Operand &obj)
+{
+ if (!_graph.getOutputs().contains(ind) || !obj.isConstant())
+ return;
+
+ auto permute_input_ind = _graph.addOperand(obj.shape(), obj.typeInfo());
+ auto &permute_input_obj = _graph.operands().at(permute_input_ind);
+
+ // Move the const data
+ permute_input_obj.data(obj.shareData());
+ obj.releaseData();
+ obj.info().setAsNonConst();
+
+ using ir::operation::Permute;
+ auto permute_obj = std::make_unique<Permute>(permute_input_ind, ind, Permute::Type::COPY);
+ auto permute_ind = _graph.operations().push(std::move(permute_obj));
+
+ permute_input_obj.insertUse(permute_ind);
+ obj.setDef(permute_ind);
+
+ // Make the operations that uses this operand to use the generated operand
+ auto orig_uses = obj.getUses();
+ for (auto &&use : orig_uses)
+ {
+ permute_input_obj.insertUse(use);
+ obj.removeUse(use);
+ _graph.operations().at(use).replaceInputs(ind, permute_input_ind);
+ }
+
+ VERBOSE(ConstantOutputPass) << "Permute Op inserted for a constant ouput, node index : "
+ << permute_ind << std::endl;
+ VERBOSE(ConstantOutputPass) << " - Input (inserted) Operand : " << permute_input_ind
+ << std::endl;
+ VERBOSE(ConstantOutputPass) << " - Output(original) Operand : " << ind << std::endl;
+}
+
+} // namespace pass
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/pass/ConstantOutputPass.h b/runtime/onert/core/src/compiler/pass/ConstantOutputPass.h
new file mode 100644
index 000000000..193dd3a68
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/ConstantOutputPass.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_PASS_CONSTANT_OUTPUT_PASS_H__
+#define __ONERT_COMPILER_PASS_CONSTANT_OUTPUT_PASS_H__
+
+#include "OperandPass.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace pass
+{
+
+/**
+ * @brief Pass to specially handle constant model outputs
+ *
+ * As an output buffer is given right before an execution but constant initialization is done at
+ * prepare phase, the current runtime structure cannot handle when an output is constant.
+ * To resolve this problem, this pass inserts a Permute layer with a const input and make the model
+ * output tensor to be its output.
+ *
+ * e.g.)
+ *
+ * ((Const Output))
+ *
+ * becomes
+ *
+ * (Const) -> [Permute] -> ((Output))
+ *
+ * Note that this is a mandatory pass for Graph.
+ */
+class ConstantOutputPass : public OperandPass
+{
+public:
+ using OperandPass::OperandPass;
+
+public:
+ std::string id() final { return "ConstantOutputPass"; }
+
+public:
+ void callback(const ir::OperandIndex &i, ir::Operand &o) final;
+};
+
+} // namespace pass
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_PASS_CONSTANT_INSERTION_PASS_H__
diff --git a/runtime/onert/core/src/compiler/pass/IPass.h b/runtime/onert/core/src/compiler/pass/IPass.h
new file mode 100644
index 000000000..77f5916fd
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/IPass.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_PASS_IPASS_H__
+#define __ONERT_COMPILER_PASS_IPASS_H__
+
+#include <string>
+
+namespace onert
+{
+namespace compiler
+{
+namespace pass
+{
+
+struct IPass
+{
+ virtual ~IPass() = default;
+
+ virtual std::string id() = 0;
+ virtual void run() = 0;
+};
+
+} // namespace pass
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_PASS_IPASS_H__
diff --git a/runtime/onert/core/src/compiler/pass/LoweredOperandPass.h b/runtime/onert/core/src/compiler/pass/LoweredOperandPass.h
index 0c5f7d745..64831a0ac 100644
--- a/runtime/onert/core/src/compiler/pass/LoweredOperandPass.h
+++ b/runtime/onert/core/src/compiler/pass/LoweredOperandPass.h
@@ -18,7 +18,7 @@
#define __ONERT_IR_PASS_LOWERED_OPERAND_PASS_H__
#include "OperandPass.h"
-#include "compiler/LoweredGraph.h"
+#include "compiler/ILoweredGraph.h"
namespace onert
{
@@ -30,8 +30,8 @@ namespace pass
class LoweredOperandPass : public OperandPass
{
public:
- LoweredOperandPass(compiler::LoweredGraph &lowered_graph)
- : OperandPass{lowered_graph.graph()}, _lowered_graph{lowered_graph}
+ LoweredOperandPass(compiler::ILoweredGraph &lowered_graph)
+ : OperandPass{lowered_graph.graph()}, _lowered_graph{lowered_graph}
{
// DO NOTHING
}
@@ -42,7 +42,7 @@ public:
void callback(const ir::OperandIndex &i, ir::Operand &o) override = 0;
protected:
- compiler::LoweredGraph &_lowered_graph;
+ compiler::ILoweredGraph &_lowered_graph;
};
} // namespace pass
diff --git a/runtime/onert/core/src/compiler/pass/LoweredOperationPass.h b/runtime/onert/core/src/compiler/pass/LoweredOperationPass.h
index 5c8569be2..27ca77c91 100644
--- a/runtime/onert/core/src/compiler/pass/LoweredOperationPass.h
+++ b/runtime/onert/core/src/compiler/pass/LoweredOperationPass.h
@@ -18,7 +18,7 @@
#define __ONERT_IR_PASS_LOWERED_OPERATION_PASS_H__
#include "OperationPass.h"
-#include "compiler/LoweredGraph.h"
+#include "compiler/ILoweredGraph.h"
namespace onert
{
@@ -30,8 +30,8 @@ namespace pass
class LoweredOperationPass : public OperationPass
{
public:
- LoweredOperationPass(LoweredGraph &lowered_graph)
- : OperationPass{lowered_graph.graph()}, _lowered_graph{lowered_graph}
+ LoweredOperationPass(ILoweredGraph &lowered_graph)
+ : OperationPass{lowered_graph.graph()}, _lowered_graph{lowered_graph}
{
// DO NOTHING
}
@@ -39,10 +39,10 @@ public:
virtual ~LoweredOperationPass() = default;
std::string id() override = 0;
- void callback(const ir::OperationIndex &i, ir::Operation &o) override = 0;
+ void callback(const ir::OperationIndex &i, ir::IOperation &o) override = 0;
protected:
- LoweredGraph &_lowered_graph;
+ ILoweredGraph &_lowered_graph;
};
} // namespace pass
diff --git a/runtime/onert/core/src/compiler/pass/OddOutputPass.cc b/runtime/onert/core/src/compiler/pass/OddOutputPass.cc
new file mode 100644
index 000000000..e2b3f6111
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/OddOutputPass.cc
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OddOutputPass.h"
+
+#include "ir/Graph.h"
+#include "ir/operation/Permute.h"
+#include "util/logging.h"
+#include "util/Utils.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace pass
+{
+
+void OddOutputPass::run()
+{
+ auto &outputs = _graph.getOutputs();
+
+ VERBOSE(OddOutputPass) << "Case 1 : An operand which is a model output and a model input"
+ << std::endl;
+ for (const auto &ind : outputs)
+ {
+ if (_graph.getInputs().contains(ind))
+ {
+ auto permute_output_ind = insertPermute(ind);
+ // Update the output to be newly added operand
+ _graph.getOutputs().replace(ind, permute_output_ind);
+ }
+ }
+
+ VERBOSE(OddOutputPass) << "Case 2 : Two or more duplicated outputs" << std::endl;
+ std::unordered_set<ir::OperandIndex> occurence;
+ for (auto &&ind : outputs)
+ {
+ auto &obj = _graph.operands().at(ind);
+ if (occurence.count(ind) == 0)
+ {
+ occurence.insert(ind);
+ continue;
+ }
+
+ // Panic when it is const, it must have been handled earlier in another pass
+ UNUSED_RELEASE(obj);
+ assert(!obj.isConstant());
+
+ auto permute_output_ind = insertPermute(ind);
+ ind = permute_output_ind; // Replace output index to fix output duplication
+ }
+}
+
+ir::OperandIndex OddOutputPass::insertPermute(ir::OperandIndex ind)
+{
+ auto &obj = _graph.operands().at(ind);
+ auto output_ind = _graph.addOperand(obj.shape(), obj.typeInfo());
+ auto &output_obj = _graph.operands().at(output_ind);
+
+ using ir::operation::Permute;
+ auto permute_obj = std::make_unique<Permute>(ind, output_ind, Permute::Type::COPY);
+ auto permute_ind = _graph.operations().push(std::move(permute_obj));
+
+ output_obj.setDef(permute_ind);
+ obj.insertUse(permute_ind);
+
+ VERBOSE(OddOutputPass) << "Permute Op inserted for a constant output, node index : "
+ << permute_ind << std::endl;
+ VERBOSE(OddOutputPass) << " - Input (original) Operand : " << ind << std::endl;
+ VERBOSE(OddOutputPass) << " - Output(inserted) Operand : " << output_ind << std::endl;
+
+ return output_ind;
+}
+
+} // namespace pass
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/pass/OddOutputPass.h b/runtime/onert/core/src/compiler/pass/OddOutputPass.h
new file mode 100644
index 000000000..2accbac60
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/OddOutputPass.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_PASS_ODD_OUTPUT_PASS_H__
+#define __ONERT_COMPILER_PASS_ODD_OUTPUT_PASS_H__
+
+#include <unordered_set>
+
+#include "Pass.h"
+#include "ir/Index.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace pass
+{
+
+/**
+ * @brief Pass to specially handle odd outputs in a subgraph
+ *
+ * Runtime Graph IR requires every input or output must have distinct tensor index, this is onert's
+ * restriction. However we allow duplication of indices in the models(or API). So we should
+ * transform the graph after model-loading.
+ *
+ * This is necessary since our API lets users to set different buffers for each input and output so
+ * it is unavoidable that we must copy the value at runtime.
+ *
+ * Note that this is a mandatory pass for Graph.
+ *
+ * Case 1 : An operand which is a model output and a model input
+ *
+ * Create an operand and insert a Permute(copy) op between them. And change the output to be the
+ * newly generated operand.
+ *
+ * e.g.)
+ *
+ * ```
+ * ((#0 Input0 and also Output0))
+ * becomes
+ * ((#0 Input0)) -> [#0 Permute] -> ((#1 Output0))
+ * ```
+ *
+ * Case 2 : Two or more duplicated outputs
+ *
+ * Do the same with Case 1, but between two outputs of the same tensor index.
+ *
+ * e.g.)
+ *
+ * ```
+ * ((#0 Input0)) -> [#0 Some Operation] -> ((#1 Output0 and also Output1))
+ * becomes
+ * ((#0 Input0)) -> [#0 Some Operation] -> ((#1 Output0)) [#1 Permute] -> ((#2 Output1))
+ * ```
+ *
+ */
+class OddOutputPass : public Pass
+{
+public:
+ using Pass::Pass;
+
+public:
+ std::string id() final { return "OddOutputPass"; }
+
+public:
+ void run() override;
+
+private:
+ ir::OperandIndex insertPermute(ir::OperandIndex input);
+};
+
+} // namespace pass
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_PASS_ODD_OUTPUT_PASS_H__
diff --git a/runtime/onert/core/src/compiler/pass/OperandPass.cc b/runtime/onert/core/src/compiler/pass/OperandPass.cc
index 50c001c30..db8ebedcd 100644
--- a/runtime/onert/core/src/compiler/pass/OperandPass.cc
+++ b/runtime/onert/core/src/compiler/pass/OperandPass.cc
@@ -28,7 +28,7 @@ namespace pass
void OperandPass::run()
{
_graph.operands().iterate(
- [&](const ir::OperandIndex &index, ir::Operand &object) { callback(index, object); });
+ [&](const ir::OperandIndex &index, ir::Operand &object) { callback(index, object); });
}
} // namespace pass
diff --git a/runtime/onert/core/src/compiler/pass/OperationPass.cc b/runtime/onert/core/src/compiler/pass/OperationPass.cc
index d7a55cb22..bd9bcb4a4 100644
--- a/runtime/onert/core/src/compiler/pass/OperationPass.cc
+++ b/runtime/onert/core/src/compiler/pass/OperationPass.cc
@@ -17,7 +17,7 @@
#include "OperationPass.h"
#include "ir/Index.h"
-#include "ir/Operation.h"
+#include "ir/IOperation.h"
#include "ir/Graph.h"
namespace onert
@@ -30,7 +30,7 @@ namespace pass
void OperationPass::run()
{
_graph.operations().iterate(
- [&](const ir::OperationIndex &index, ir::Operation &node) { callback(index, node); });
+ [&](const ir::OperationIndex &index, ir::IOperation &node) { callback(index, node); });
}
} // namespace pass
diff --git a/runtime/onert/core/src/compiler/pass/OperationPass.h b/runtime/onert/core/src/compiler/pass/OperationPass.h
index ac4d818a2..0a00b11d1 100644
--- a/runtime/onert/core/src/compiler/pass/OperationPass.h
+++ b/runtime/onert/core/src/compiler/pass/OperationPass.h
@@ -29,7 +29,7 @@ namespace onert
{
namespace ir
{
-class Operation;
+struct IOperation;
} // namespace ir
} // namespace onert
@@ -62,7 +62,7 @@ public:
* @param index is the index of a node in graph
* @param node is the node in graph
*/
- virtual void callback(const ir::OperationIndex &index, ir::Operation &node) = 0;
+ virtual void callback(const ir::OperationIndex &index, ir::IOperation &node) = 0;
/**
* @brief Run the pass
diff --git a/runtime/onert/core/src/compiler/pass/Pass.h b/runtime/onert/core/src/compiler/pass/Pass.h
index 3f356c337..b34695c97 100644
--- a/runtime/onert/core/src/compiler/pass/Pass.h
+++ b/runtime/onert/core/src/compiler/pass/Pass.h
@@ -17,6 +17,8 @@
#ifndef __ONERT_COMPILER_PASS_PASS_H__
#define __ONERT_COMPILER_PASS_PASS_H__
+#include "IPass.h"
+
#include <string>
namespace onert
@@ -24,7 +26,7 @@ namespace onert
namespace ir
{
class Graph;
-} // namespace compiler
+} // namespace ir
} // namespace onert
namespace onert
@@ -34,7 +36,7 @@ namespace compiler
namespace pass
{
-class Pass
+class Pass : public IPass
{
public:
Pass(ir::Graph &graph) : _graph{graph} {}
diff --git a/runtime/onert/core/src/compiler/pass/PassRunner.cc b/runtime/onert/core/src/compiler/pass/PassRunner.cc
new file mode 100644
index 000000000..cd1b82bb2
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/PassRunner.cc
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PassRunner.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace pass
+{
+
+PassRunner &PassRunner::append(std::unique_ptr<IPass> pass)
+{
+ _passes.emplace_back(std::move(pass));
+ return *this;
+}
+
+void PassRunner::run()
+{
+ for (auto &&pass : _passes)
+ {
+ VERBOSE(PassRunner) << "Start running '" << pass->id() << "'" << std::endl;
+ pass->run();
+ VERBOSE(PassRunner) << "Finished running '" << pass->id() << "'" << std::endl;
+ // TODO Dump graph?
+ }
+}
+
+} // namespace pass
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/pass/PassRunner.h b/runtime/onert/core/src/compiler/pass/PassRunner.h
new file mode 100644
index 000000000..03bfbe220
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/PassRunner.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_PASS_PASS_RUNNER_H__
+#define __ONERT_COMPILER_PASS_PASS_RUNNER_H__
+
+#include <initializer_list>
+#include <memory>
+#include <vector>
+
+#include "IPass.h"
+#include "util/logging.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace pass
+{
+
+/**
+ * @brief Composite passes with logging
+ */
+class PassRunner
+{
+public:
+ PassRunner() = default;
+ PassRunner &append(std::unique_ptr<IPass> pass);
+
+ void run();
+
+private:
+ std::vector<std::unique_ptr<IPass>> _passes;
+};
+
+} // namespace pass
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_PASS_PASS_RUNNER_H__
diff --git a/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc
index f01697034..d9452c7f9 100644
--- a/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc
+++ b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc
@@ -15,8 +15,8 @@
*/
#include "PermutationEliminationPass.h"
-#include "backend/controlflow/Config.h"
+#include "backend/Backend.h"
#include "util/logging.h"
namespace onert
@@ -26,7 +26,7 @@ namespace compiler
namespace pass
{
-void PermutationEliminationPass::callback(const ir::OperationIndex &ind, ir::Operation &node)
+void PermutationEliminationPass::callback(const ir::OperationIndex &ind, ir::IOperation &node)
{
_op_ind = ind;
node.accept(*this);
@@ -39,8 +39,9 @@ void PermutationEliminationPass::visit(const ir::operation::Permute &node)
// Check if two tensors are both portable if not, we can't eliminate the node
{
- auto in_def_factor = _lowered_graph.getLowerInfo(in_operand)->def_factors().getOnlyElement();
- auto out_def_factor = _lowered_graph.getLowerInfo(out_operand)->def_factors().getOnlyElement();
+ auto &operand_li_map = _lowered_graph.lower_info().operand;
+ auto in_def_factor = operand_li_map.getRawPtr(in_operand)->def_factors().getOnlyElement();
+ auto out_def_factor = operand_li_map.getRawPtr(out_operand)->def_factors().getOnlyElement();
auto in_config = in_def_factor.backend()->config();
auto out_config = out_def_factor.backend()->config();
@@ -53,59 +54,50 @@ void PermutationEliminationPass::visit(const ir::operation::Permute &node)
if (_graph.getOutputs().contains(out_operand))
{
+ // If the input is a const, we cannot remove it since we cannot put the constant data in the
+ // output buffer during prepare phase.
+ auto permute_input = node.getInputs().at(0);
+ if (_graph.operands().at(permute_input).isConstant())
+ return;
+ // If the input is a model input, we cannot remove it since our API lets users to set different
+ // buffers for inputs and outputs even though one tensor is both at the same time.
+ auto permute_output = node.getOutputs().at(0);
+ if (_graph.getInputs().contains(permute_input) && _graph.getOutputs().contains(permute_output))
+ return;
+ // Likewise, if copying between outputs to outputs, keep it.
+ if (_graph.getOutputs().contains(permute_input) && _graph.getOutputs().contains(permute_output))
+ return;
+
// Exceptional case : When the output operand is a model output
// In this case we keep the output and remove the input
auto &out_operand_obj = _graph.operands().at(out_operand);
assert(out_operand_obj.getDef() == _op_ind);
out_operand_obj.unsetDef();
- _lowered_graph.op_seqs().iterate([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
- if (!op_seq.getOutputs().contains(in_operand))
+ _graph.operations().iterate([&](const ir::OperationIndex &op_ind, ir::IOperation &op) {
+ if (!op.getOutputs().contains(in_operand))
return;
-
- // Update OpSequence/ir::Operation edges and ir::Operand edges
- op_seq.replaceOutputs(in_operand, out_operand);
- for (auto op : op_seq.operations())
- {
- auto &operation_obj = _graph.operations().at(op);
- if (operation_obj.getOutputs().contains(in_operand))
- {
- operation_obj.replaceOutputs(in_operand, out_operand);
- out_operand_obj.setDef(op);
- }
- }
+ // Update Operation and Operand edges
+ op.replaceOutputs(in_operand, out_operand);
+ out_operand_obj.setDef(op_ind);
});
- // Remove Permute operation, enclosing OpSequence and the operand
+ // Remove Permute operation and the operand
{
_graph.removeOperand(in_operand);
-
- auto op_seq_ind = _lowered_graph.op_seqs().getOperation(_op_ind);
- // Assumes enclosing OpSequence contatins just this Permute operation
- assert(_lowered_graph.op_seqs().at(op_seq_ind).size() == 1);
- _lowered_graph.op_seqs().remove(op_seq_ind);
_graph.operations().remove(_op_ind);
}
- _lowered_graph.op_seqs().iterate([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
- if (!op_seq.getInputs().contains(in_operand))
+ _graph.operations().iterate([&](const ir::OperationIndex &op_ind, ir::IOperation &op) {
+ if (!op.getInputs().contains(in_operand))
return;
-
- op_seq.replaceInputs(in_operand, out_operand);
- for (auto op : op_seq.operations())
- {
- auto &operation_obj = _graph.operations().at(op);
- if (operation_obj.getInputs().contains(in_operand))
- {
- operation_obj.replaceInputs(in_operand, out_operand);
- out_operand_obj.insertUse(op);
- }
- }
+ op.replaceInputs(in_operand, out_operand);
+ out_operand_obj.insertUse(op_ind);
});
VERBOSE(removePermute) << "Permute Op removed, node index : " << _op_ind << std::endl;
- VERBOSE(removePermute) << " - Input (removed) ir::Operand : " << in_operand << std::endl;
- VERBOSE(removePermute) << " - Output(kept) ir::Operand : " << out_operand << std::endl;
+ VERBOSE(removePermute) << " - Input (removed) Operand : " << in_operand << std::endl;
+ VERBOSE(removePermute) << " - Output(kept) Operand : " << out_operand << std::endl;
}
else
{
@@ -114,37 +106,23 @@ void PermutationEliminationPass::visit(const ir::operation::Permute &node)
auto &in_operand_obj = _graph.operands().at(in_operand);
in_operand_obj.removeUse(_op_ind);
- // Make OpSequences(that use the output) use the input
- _lowered_graph.op_seqs().iterate([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
- if (!op_seq.getInputs().contains(out_operand))
+ // Make operations(that use the output) use the input
+ _graph.operations().iterate([&](const ir::OperationIndex &op_ind, ir::IOperation &op) {
+ if (!op.getInputs().contains(out_operand))
return;
-
- op_seq.replaceInputs(out_operand, in_operand);
- for (auto op : op_seq.operations())
- {
- auto &operation_obj = _graph.operations().at(op);
- if (operation_obj.getInputs().contains(out_operand))
- {
- operation_obj.replaceInputs(out_operand, in_operand);
- in_operand_obj.insertUse(op);
- }
- }
+ op.replaceInputs(out_operand, in_operand);
+ in_operand_obj.insertUse(op_ind);
});
- // Remove Permute operation, enclosing OpSequence and the operand
+ // Remove the Permute operation and out_operand
{
_graph.removeOperand(out_operand);
-
- auto op_seq_ind = _lowered_graph.op_seqs().getOperation(_op_ind);
- // Assumes enclosing OpSequence contatins just this Permute operation
- assert(_lowered_graph.op_seqs().at(op_seq_ind).size() == 1);
- _lowered_graph.op_seqs().remove(op_seq_ind);
_graph.operations().remove(_op_ind);
}
- VERBOSE(removePermute) << "Permute Op removed, node index : " << _op_ind << std::endl;
- VERBOSE(removePermute) << " - Input (kept) ir::Operand : " << in_operand << std::endl;
- VERBOSE(removePermute) << " - Output(removed) ir::Operand : " << out_operand << std::endl;
+ VERBOSE(removePermute) << "Permute Op removed : " << _op_ind << std::endl;
+ VERBOSE(removePermute) << " - Input (kept) Operand : " << in_operand << std::endl;
+ VERBOSE(removePermute) << " - Output(removed) Operand : " << out_operand << std::endl;
}
}
diff --git a/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h
index 29daf1a82..18ba99804 100644
--- a/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h
+++ b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h
@@ -35,7 +35,7 @@ namespace pass
* are compatible and layouts match.
*
* Permute input tensor is kept and the output is removed for all the cases, except model outputs.
- * As all output tensors have to be controlflow backend, so the output is kept.
+ * As all output tensors have to be builtin backend, so the output is kept.
*
* @note This is an optimization pass which means that everything should work fine even if this pass
* was skipped.
@@ -49,7 +49,7 @@ public:
std::string id() final { return "PermutationEliminationPass"; }
public:
- void callback(const ir::OperationIndex &i, ir::Operation &n) final;
+ void callback(const ir::OperationIndex &i, ir::IOperation &n) final;
private:
void visit(const ir::operation::Permute &) final;
diff --git a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
index c83a72ada..11c22778e 100644
--- a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
+++ b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
@@ -9,6 +9,7 @@
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
@@ -16,18 +17,16 @@
#include "PermutationInsertionPass.h"
-#include <cassert>
-#include <utility>
-#include <unordered_map>
+#include "../../backend/builtin/Config.h"
-#include "backend/controlflow/Config.h"
-#include "ir/Operand.h"
-#include "ir/operation/LowerInfo.h"
-#include "ir/Graph.h"
-#include "backend/IConfig.h"
+#include "compiler/OperationLowerInfo.h"
+#include "ir/operation/Permute.h"
#include "util/logging.h"
+
+#include <cassert>
#include <memory>
-#include "ir/operation/Permute.h"
+#include <unordered_map>
+#include <utility>
namespace onert
{
@@ -38,7 +37,8 @@ namespace pass
void PermutationInsertionPass::callback(const ir::OperandIndex &index, ir::Operand &object)
{
- auto &&operand_li = _lowered_graph.getLowerInfo(index);
+ auto &operand_li_map = _lowered_graph.lower_info().operand;
+ auto &&operand_li = operand_li_map.getRawPtr(index);
assert(operand_li);
// NOTE Later, constants also will have Def
@@ -51,16 +51,16 @@ void PermutationInsertionPass::callback(const ir::OperandIndex &index, ir::Opera
std::list<ir::OperationIndex> permute_indexes;
// Build a map for all necessary type of operands
- std::unordered_map<ir::operand::PermuteFactor, ir::OperandIndex> factor_to_index;
+ std::unordered_map<PermuteFactor, ir::OperandIndex> factor_to_index;
{
assert(operand_li->def_factors().size() == 1);
- for (auto factor : operand_li->def_factors())
+ for (auto &&factor : operand_li->def_factors())
{
factor_to_index.emplace(factor, index);
}
auto insert_set = operand_li->use_factors() - operand_li->def_factors();
- for (auto factor : insert_set)
+ for (auto &&factor : insert_set)
{
const auto permute_operation_index = insertPermute(index, factor);
permute_indexes.push_back(permute_operation_index);
@@ -75,33 +75,22 @@ void PermutationInsertionPass::callback(const ir::OperandIndex &index, ir::Opera
std::list<ir::OperationIndex> remove_list;
auto uses = object.getUses();
- for (auto use : uses)
+ for (auto &&use : uses)
{
// If permute operation, ignore it
if (std::find(permute_indexes.begin(), permute_indexes.end(), use) != permute_indexes.end())
continue;
auto &operation = _graph.operations().at(use);
- assert(_lowered_graph.op_seqs().containsOperation(use));
- auto op_seq_index = _lowered_graph.op_seqs().getOperation(use);
- auto op_seq_li = _lowered_graph.getLowerInfo(op_seq_index);
- assert(op_seq_li);
- const auto op_seq_layout = op_seq_li->layout();
- const backend::Backend *backend = op_seq_li->backend();
+ auto op_li = _lowered_graph.lower_info().operation.getRawPtr(use);
+ assert(op_li);
+ const auto op_layout = op_li->layout();
+ const backend::Backend *backend = op_li->backend();
assert(backend);
- auto use_node_inputs = operation.getInputs();
- assert(use_node_inputs.contains(index));
- auto new_index = factor_to_index.at({backend, op_seq_layout});
+ auto new_index = factor_to_index.at({backend, op_layout});
if (index != new_index)
{
- // Update from op_seq
- // Replace the same inputs of an OpSequence at once for the following reasons:
- // 1. An OpSequence's inputs are the same inputs of first operation
- // 2. An OpSequence may have inputs as the same operand (2 or more).
- // 3. The same inputs of OpSequence have the same PermuteFactor.
- _lowered_graph.op_seqs().at(op_seq_index).replaceInputs(index, new_index);
-
// Update from operation
// Replace the same inputs of an operation at once for the following reasons:
// No. 2 and 3 above
@@ -109,63 +98,69 @@ void PermutationInsertionPass::callback(const ir::OperandIndex &index, ir::Opera
// Update from operand
remove_list.push_back(
- use); // Removal should be done in another loop since we are in the loop
+ use); // Removal should be done in another loop since we are in the loop
_graph.operands().at(new_index).insertUse(use);
}
}
- for (auto &operation : remove_list)
+ for (const auto &operation_index : remove_list)
{
- object.removeUse(operation);
+ object.removeUse(operation_index);
}
}
}
ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandIndex &operand_index,
- const ir::operand::PermuteFactor &factor)
+ const PermuteFactor &factor)
{
- assert(!_graph.isBuildingPhase());
-
auto &operand = _graph.operands().at(operand_index);
// Generate output operand and permute operation
auto out_operand_index = _graph.addOperand(operand.shape(), operand.typeInfo());
- // change model output if operand_index is model output index
+ // change model output if operand_index is model output index and the out operand is builtin
+ // backend
auto &model_outputs = _graph.getOutputs();
- if (model_outputs.contains(operand_index))
+ const backend::Backend *builtin_backend = compiler::BackendManager::get().getBuiltin();
+ assert(builtin_backend->config()->id() == onert::backend::builtin::Config::ID);
+
+ if (model_outputs.contains(operand_index) && factor.backend() == builtin_backend)
{
model_outputs.replace(operand_index, out_operand_index);
}
+ auto &operand_li_map = _lowered_graph.lower_info().operand;
+
// Find Permute information
- auto input_factor = _lowered_graph.getLowerInfo(operand_index)->def_factors().getOnlyElement();
+ auto input_factor = operand_li_map.getRawPtr(operand_index)->def_factors().getOnlyElement();
auto input_backend = input_factor.backend();
auto output_backend = factor.backend();
// NOTE Permute may not have specific layout because the layout of input and output may be
// different.
const auto permute_node_layout = ir::Layout::UNKNOWN;
// NOTE If one backend supports several layout, the backend must support Permute operation
- const backend::Backend *permute_node_backend = compiler::BackendManager::get().getControlflow();
+ const backend::Backend *permute_node_backend = compiler::BackendManager::get().getBuiltin();
+ assert(permute_node_backend->config()->id() == onert::backend::builtin::Config::ID);
+
if (input_backend == output_backend)
{
permute_node_backend = input_backend;
}
- const ir::operand::PermuteFactor permute_node_factor{permute_node_backend, permute_node_layout};
+ const PermuteFactor permute_node_factor{permute_node_backend, permute_node_layout};
// Update LowerInfo of input operand
- auto operand_lower_info = _lowered_graph.getLowerInfo(operand_index);
+ auto operand_lower_info = operand_li_map.getRawPtr(operand_index);
operand_lower_info->removeUsePermuteFactor(factor);
operand_lower_info->addUsePermuteFactor(permute_node_factor);
// Update LowerInfo of output operand
- auto out_operand_li = std::make_unique<ir::operand::LowerInfo>();
+ auto out_operand_li = std::make_unique<compiler::OperandLowerInfo>();
// The input and output factors of all nodes will be the same except Permute. So Tensor's
// allocators allocates memory using only the information of def permutation factor now.
// TODO Change param to permute_node_factor
out_operand_li->addDefPermuteFactor(factor);
out_operand_li->addUsePermuteFactor(factor);
- _lowered_graph.setLowerInfo(out_operand_index, std::move(out_operand_li));
+ operand_li_map.set(out_operand_index, std::move(out_operand_li));
// Insert permute operation to the graph
const auto input_layout = input_factor.layout();
@@ -188,20 +183,18 @@ ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandInde
auto insert_node = std::make_unique<Permute>(operand_index, out_operand_index, permute_type);
auto node_index = _graph.operations().push(std::move(insert_node));
- const auto &node = _graph.operations().at(node_index);
VERBOSE_F() << "Permute Op inserted, node index : " << node_index << std::endl;
- VERBOSE_F() << " - Input (original) Operand : " << operand_index << std::endl;
- VERBOSE_F() << " - Output(inserted) Operand : " << out_operand_index << std::endl;
+ VERBOSE_F() << " - Input (original) Operand : " << operand_index << "("
+ << input_factor.backend()->config()->id() << ")" << std::endl;
+ VERBOSE_F() << " - Output(inserted) Operand : " << out_operand_index << "("
+ << factor.backend()->config()->id() << ")" << std::endl;
- // OpSequence
+ // Operation LowerInfo
{
- auto op_seq_index = _lowered_graph.op_seqs().emplace(node_index, permute_node_layout);
- auto &op_seq = _lowered_graph.op_seqs().at(op_seq_index);
- op_seq.setInputs(node.getInputs());
- op_seq.setOutputs(node.getOutputs());
- _lowered_graph.setLowerInfo(op_seq_index, std::make_unique<ir::operation::LowerInfo>(
- permute_node_backend, permute_node_layout));
+ auto &operation_li_map = _lowered_graph.lower_info().operation;
+ operation_li_map.set(node_index, std::make_unique<compiler::OperationLowerInfo>(
+ permute_node_backend, permute_node_layout));
}
// Update Use/Def info
diff --git a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.h b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.h
index 758515385..ee0a1464c 100644
--- a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.h
+++ b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.h
@@ -20,7 +20,7 @@
#include "LoweredOperandPass.h"
#include "compiler/BackendManager.h"
#include "ir/Operand.h"
-#include "ir/operand/PermuteFactor.h"
+#include "compiler/PermuteFactor.h"
namespace onert
{
@@ -48,7 +48,7 @@ private:
* @return ir::OperationIndex
*/
ir::OperationIndex insertPermute(const ir::OperandIndex &operand_index,
- const ir::operand::PermuteFactor &factor);
+ const PermuteFactor &factor);
};
} // namespace pass
diff --git a/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc
index c5c95c726..f014d29d3 100644
--- a/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc
+++ b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc
@@ -30,10 +30,10 @@ namespace pass
using namespace ir;
-void PermutationOperationPass::callback(const OperationIndex &, Operation &node)
+void PermutationOperationPass::callback(const OperationIndex &, IOperation &node)
{
node.accept(*this);
-};
+}
// TODO Remove this. Expanding ranks of Operand is dangerous
void PermutationOperationPass::applyExpandRanks(const Operation &node)
@@ -43,9 +43,8 @@ void PermutationOperationPass::applyExpandRanks(const Operation &node)
assert(output.getDef().valid());
const auto node_index = output.getDef();
- const auto &op_seq_index = _lowered_graph.op_seqs().getOperation(node_index);
- const auto frontend_layout = _lowered_graph.op_seqs().at(op_seq_index).getLayout();
- const auto backend_layout = _lowered_graph.getLowerInfo(op_seq_index)->layout();
+ const auto frontend_layout = _graph.layout();
+ const auto backend_layout = _lowered_graph.lower_info().operation.getRawPtr(node_index)->layout();
if (frontend_layout == backend_layout)
{
@@ -84,10 +83,11 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node)
assert(output_obj.getDef().valid());
const auto node_index = output_obj.getDef();
- const auto &op_seq_index = _lowered_graph.op_seqs().getOperation(node_index);
- const auto frontend_layout = _lowered_graph.op_seqs().at(op_seq_index).getLayout();
- const auto backend_layout = _lowered_graph.getLowerInfo(op_seq_index)->layout();
+ auto &operation_li_map = _lowered_graph.lower_info().operation;
+ auto &operand_li_map = _lowered_graph.lower_info().operand;
+ const auto frontend_layout = _graph.layout();
+ const auto backend_layout = operation_li_map.getRawPtr(node_index)->layout();
if (frontend_layout == backend_layout)
{
@@ -97,96 +97,27 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node)
// Permutation changing layout beyond 4-D is not supported yet
assert(output_obj.shape().rank() <= 4);
- // Divide op_seq based on target operation
- {
- auto &prev_op_seq = _lowered_graph.op_seqs().at(op_seq_index);
- auto &operations = _lowered_graph.graph().operations();
-
- // Create new op_seq and move information from existing op_seq to new op_seq if target
- // node is the end of op_seq
- auto it = prev_op_seq.begin();
- // Find iterator of target node in op_seq
- while (*(it++) != node_index)
- ;
- if (it != prev_op_seq.end())
- {
- const auto &target_op_idx = *it;
- const auto &target_node = operations.at(target_op_idx);
- const auto &next_op_seq_index =
- _lowered_graph.op_seqs().emplace(target_op_idx, prev_op_seq.getLayout());
- auto &next_op_seq = _lowered_graph.op_seqs().at(next_op_seq_index);
- next_op_seq.setInputs(target_node.getInputs());
- next_op_seq.setOutputs(target_node.getOutputs());
-
- std::vector<OperationIndex> remove_list;
- remove_list.emplace_back(target_op_idx);
- while (++it != prev_op_seq.end())
- {
- next_op_seq.appendOperation(target_op_idx);
- next_op_seq.setOutputs(target_node.getOutputs());
- remove_list.emplace_back(target_op_idx);
- }
-
- prev_op_seq.setOutputs(node.getOutputs());
- for (const auto &index : remove_list)
- {
- prev_op_seq.remove(index);
- }
-
- const auto op_seq_li = _lowered_graph.getLowerInfo(op_seq_index);
- _lowered_graph.setLowerInfo(
- next_op_seq_index,
- std::make_unique<ir::operation::LowerInfo>(op_seq_li->backend(), op_seq_li->layout()));
- }
- }
-
- // Remove target operation from op_seq and insert the target operation to new op_seq
+ // Change PermuteFactors of operands and the operation of target node
{
- const auto backend = _lowered_graph.getLowerInfo(op_seq_index)->backend();
+ const auto op_li = operation_li_map.getRawPtr(node_index);
+ const auto backend = op_li->backend();
- // Remove target operation from op_sequence
- _lowered_graph.op_seqs().removeFromOpSequence(node_index);
+ operation_li_map.set(node_index,
+ std::make_unique<compiler::OperationLowerInfo>(backend, frontend_layout));
- if (!_lowered_graph.op_seqs().exist(op_seq_index))
- {
- // Remove lowerinfo for op_seq of target operation if the op_seq does not exist
- _lowered_graph.removeLowerInfo(op_seq_index);
- }
- else
- {
- // Update op_seq of target operation if the op_seq exists
- auto &prev_op_seq = _lowered_graph.op_seqs().at(op_seq_index);
- const auto &last_node_idx = *(--prev_op_seq.end());
- const auto &last_node = _lowered_graph.graph().operations().at(last_node_idx);
- prev_op_seq.setOutputs(last_node.getOutputs());
- }
-
- // Create new op_seq and set information to the op_seq
- auto new_op_seq_index = _lowered_graph.op_seqs().emplace(node_index, frontend_layout);
- auto &new_op_seq = _lowered_graph.op_seqs().at(new_op_seq_index);
- new_op_seq.setInputs(node.getInputs());
- new_op_seq.setOutputs(node.getOutputs());
- _lowered_graph.setLowerInfo(
- new_op_seq_index, std::make_unique<ir::operation::LowerInfo>(backend, frontend_layout));
- }
-
- // Change PermuteFactors of operands of target node
- {
- const auto &op_seq_index = _lowered_graph.op_seqs().getOperation(node_index);
- const auto op_seq_li = _lowered_graph.getLowerInfo(op_seq_index);
- const auto backend = op_seq_li->backend();
- const operand::PermuteFactor removed_factor{backend, backend_layout};
- const operand::PermuteFactor new_factor{backend, frontend_layout};
+ const PermuteFactor removed_factor{backend, backend_layout};
+ const PermuteFactor new_factor{backend, frontend_layout};
for (const auto &input : node.getInputs() | Remove::DUPLICATED | Remove::UNDEFINED)
{
+ // Check if it can be removed by checking if the operand is used by another operation and
+ // it uses the same backend and layout
bool canRemove = true;
for (const auto &use : _graph.operands().at(input).getUses())
{
if (use != node_index)
{
- const auto &use_op_seq_index = _lowered_graph.op_seqs().getOperation(use);
- auto use_op_seq_li = _lowered_graph.getLowerInfo(use_op_seq_index);
- if (use_op_seq_li->backend() == backend && use_op_seq_li->layout() == backend_layout)
+ auto use_op_li = operation_li_map.getRawPtr(use);
+ if (use_op_li->backend() == backend && use_op_li->layout() == backend_layout)
{
canRemove = false;
break;
@@ -194,27 +125,27 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node)
}
}
- auto lower_info = _lowered_graph.getLowerInfo(input);
+ auto input_li = operand_li_map.getRawPtr(input);
if (canRemove)
{
- lower_info->removeUsePermuteFactor(removed_factor);
+ input_li->removeUsePermuteFactor(removed_factor);
}
- lower_info->addUsePermuteFactor(new_factor);
+ input_li->addUsePermuteFactor(new_factor);
// Whether if node's input is an input of model or a constant
if (!_graph.operands().at(input).getDef().valid() &&
- (lower_info->def_factors().size() == 1 &&
- lower_info->def_factors().getOnlyElement() == removed_factor))
+ (input_li->def_factors().size() == 1 &&
+ input_li->def_factors().getOnlyElement() == removed_factor))
{
assert(_graph.getInputs().contains(input) || _graph.operands().at(input).isConstant());
- lower_info->removeDefPermuteFactor(removed_factor);
- lower_info->addDefPermuteFactor(new_factor);
+ input_li->removeDefPermuteFactor(removed_factor);
+ input_li->addDefPermuteFactor(new_factor);
}
}
- for (const auto &output : node.getOutputs() | Remove::DUPLICATED)
+ for (const auto &output : node.getOutputs() | Remove::DUPLICATED | Remove::UNDEFINED)
{
- auto lower_info = _lowered_graph.getLowerInfo(output);
+ auto lower_info = operand_li_map.getRawPtr(output);
lower_info->removeDefPermuteFactor(removed_factor);
lower_info->addDefPermuteFactor(new_factor);
@@ -279,6 +210,18 @@ void PermutationOperationPass::visit(const ir::operation::Gather &node)
}
}
+void PermutationOperationPass::visit(const ir::operation::OneHot &node)
+{
+ const auto &output_ind = node.getOutputs().at(0);
+ const auto &output_obj = _graph.operands().at(output_ind);
+ const auto &output_shape = output_obj.shape();
+
+ if (output_shape.rank() >= 4)
+ {
+ changeToKeepLayout(node);
+ }
+}
+
void PermutationOperationPass::visit(const ir::operation::Pack &node)
{
const auto &input_ind = node.getInputs().at(ir::operation::Reshape::Input::INPUT);
diff --git a/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h
index 2dd76b971..e253a77ad 100644
--- a/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h
+++ b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h
@@ -36,7 +36,7 @@ public:
std::string id() final { return "PermutationOperationPass"; }
public:
- void callback(const ir::OperationIndex &i, ir::Operation &n) final;
+ void callback(const ir::OperationIndex &i, ir::IOperation &n) final;
public:
void visit(const ir::operation::BinaryArithmetic &) final;
@@ -44,6 +44,7 @@ public:
void visit(const ir::operation::Concat &) final;
void visit(const ir::operation::ElementwiseBinary &) final;
void visit(const ir::operation::ElementwiseUnary &) final;
+ void visit(const ir::operation::OneHot &) final;
void visit(const ir::operation::Pack &) final;
void visit(const ir::operation::PReLU &) final;
void visit(const ir::operation::SquaredDifference &) final;
diff --git a/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.cc b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.cc
new file mode 100644
index 000000000..162c4e7ef
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.cc
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Pass.h"
+
+#include "UnusedOperandEliminationPass.h"
+#include "ir/Index.h"
+#include "util/Set.h"
+#include "ir/Graph.h"
+
+/**
+ * @file UnusedOperandEliminationPass.cc
+ * @brief This file contains UnusedOperandEliminationPass class implementation
+ */
+
+namespace onert
+{
+namespace compiler
+{
+namespace pass
+{
+
+void UnusedOperandEliminationPass::run()
+{
+ util::Set<ir::OperandIndex> used;
+
+ _graph.operations().iterate([&](const ir::OperationIndex &, const ir::IOperation &node) {
+ for (auto &&ind : (node.getInputs() + node.getOutputs()) | ir::Remove::UNDEFINED)
+ {
+ used.add(ind);
+ }
+ });
+
+ // Graph's inputs/outputs are always considered as used
+ for (auto &&ind : (_graph.getInputs() + _graph.getOutputs()) | ir::Remove::UNDEFINED)
+ {
+ used.add(ind);
+ }
+
+ _graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
+ if (!used.contains(ind))
+ {
+ VERBOSE() << "Remove unused operand " << ind << std::endl;
+ _graph.operands().remove(ind);
+ }
+ });
+}
+
+} // namespace pass
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.h b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.h
new file mode 100644
index 000000000..8078f4246
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file UnusedOperandEliminationPass.h
+ * @brief This file contains UnusedOperandEliminationPass class
+ */
+
+#ifndef __ONERT_COMPILER_PASS_UNUSED_OPERAND_ELIMINATION_PASS_H__
+#define __ONERT_COMPILER_PASS_UNUSED_OPERAND_ELIMINATION_PASS_H__
+
+#include "Pass.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace pass
+{
+
+/**
+ * @brief A pass to eliminate unused operands from the graph
+ *
+ * Remove operands that are not used by any operations, except Graph inputs/outputs.
+ *
+ */
+class UnusedOperandEliminationPass : public Pass
+{
+public:
+ using Pass::Pass;
+
+public:
+ std::string id() override { return "UnusedOperandEliminationPass"; }
+ void run() final;
+};
+
+} // namespace pass
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_PASS_UNUSED_OPERAND_ELIMINATION_PASS_H__
diff --git a/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.test.cc b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.test.cc
new file mode 100644
index 000000000..572b4df24
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.test.cc
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "UnusedOperandEliminationPass.h"
+
+#include "ir/Graph.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert::ir;
+using namespace onert::compiler::pass;
+
+TEST(UnusedOperandEliminationPass, Simple)
+{
+ Graph graph;
+
+ // Add tensors
+ Shape shape{1, 2, 2, 1};
+ TypeInfo type{DataType::FLOAT32};
+ auto in = graph.addOperand(shape, type);
+ auto out = graph.addOperand(shape, type);
+
+ auto unused = graph.addOperand(shape, type);
+
+ // Set model inputs/outputs
+ graph.addInput(in);
+ graph.addOutput(out);
+
+ UnusedOperandEliminationPass{graph}.run();
+
+ ASSERT_TRUE(graph.operands().exist(in));
+ ASSERT_TRUE(graph.operands().exist(out));
+ ASSERT_FALSE(graph.operands().exist(unused));
+}
diff --git a/runtime/onert/core/src/compiler/train/LoweredTrainableGraph.cc b/runtime/onert/core/src/compiler/train/LoweredTrainableGraph.cc
new file mode 100644
index 000000000..490c648cd
--- /dev/null
+++ b/runtime/onert/core/src/compiler/train/LoweredTrainableGraph.cc
@@ -0,0 +1,285 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "compiler/train/LoweredTrainableGraph.h"
+
+#include "../ManualScheduler.h"
+#include "../pass/ConstantInsertionPass.h"
+#include "../pass/ConstantLoweringPass.h"
+#include "../pass/PassRunner.h"
+#include "../pass/PermutationEliminationPass.h"
+#include "../pass/PermutationInsertionPass.h"
+#include "../pass/PermutationOperationPass.h"
+#include "../../backend/builtin/Config.h"
+#include "../../dumper/text/GraphDumper.h"
+#include "../../ir/verifier/Verifier.h"
+#include "TrainableOperationConverter.h"
+
+#include "backend/Backend.h"
+#include "backend/train/ITrainableBackend.h"
+#include "compiler/BackendResolver.h"
+#include "util/logging.h"
+
+#include <cassert>
+#include <sstream>
+
+namespace onert
+{
+namespace compiler
+{
+namespace train
+{
+
+LoweredTrainableGraph::LoweredTrainableGraph(ir::train::TrainableGraph &graph,
+ const CompilerOptions &options)
+ : _trainable_graph{graph}
+{
+ lowerGraph(options);
+}
+
+void LoweredTrainableGraph::lowerGraph(const CompilerOptions &options)
+{
+ // Build backend contexts
+ auto &backend_manager = BackendManager::get();
+ // Create contexts for other backends
+ for (auto &&backend_str : options.backend_list)
+ {
+ backend_manager.loadBackend(backend_str);
+ auto backend = backend_manager.get(backend_str);
+
+ // TODO As the default value of backend list contains "cpu", "acl_cl" and "acl_neon", and some
+ // are not available on x64 or some other platforms. So this may be a workaround for x64 and
+ // we should change it back(throw if backend is not loaded) later.
+ if (!backend)
+ {
+ VERBOSE(LoweredTrainableGraph) << "Cannot load backend - " << backend_str << std::endl;
+ continue;
+ }
+ }
+ if (backend_manager.num_backends() == 0)
+ throw std::runtime_error{"No available backends loaded."};
+
+ // TODO Move "schedule" phase out of here
+ // TODO Scheduling
+ std::unique_ptr<BackendResolver> backend_resolver;
+ auto all_backends = backend_manager.getAll();
+
+ auto scheduler = ManualScheduler(all_backends, options);
+ backend_resolver = scheduler.schedule(_trainable_graph.graph());
+
+ // Check if backends are trainable
+ _trainable_graph.operations().iterate(
+ [&](const ir::OperationIndex &op_ind, const ir::IOperation &) {
+ const auto backend = backend_resolver->getBackend(op_ind);
+
+ // TODO Remove dynamic_cast
+ if (dynamic_cast<const backend::train::ITrainableBackend *>(backend) == nullptr)
+ {
+ throw std::runtime_error(backend->config()->id() + "backend does not support training");
+ }
+ });
+
+ makeLowerInfo(*backend_resolver);
+ VERBOSE(LoweredTrainableGraph) << "dump before mandatory passes" << std::endl;
+ dumper::text::dumpLoweredGraph(*this);
+
+ // Mandatory passes - kind of legalization(?)
+ compiler::pass::PassRunner{}
+ .append(std::make_unique<compiler::pass::ConstantInsertionPass>(*this))
+ .append(std::make_unique<compiler::pass::ConstantLoweringPass>(*this))
+ .append(std::make_unique<compiler::pass::PermutationOperationPass>(*this))
+ .append(std::make_unique<compiler::pass::PermutationInsertionPass>(*this))
+ .run();
+
+ // TODO Move converting Permute op into PermutationInsertionPass
+ auto op_converter = TrainableOperationConverter{_trainable_graph, nullptr};
+ _trainable_graph.operations().iterate(
+ [&](const onert::ir::OperationIndex &index, const onert::ir::IOperation &op) {
+ if (op.opcode() == ir::OpCode::Permute)
+ {
+ auto trainable_op = op_converter(op);
+ auto gen_index = _trainable_graph.replaceOperation(index, std::move(trainable_op));
+ UNUSED_RELEASE(gen_index);
+ assert(gen_index == index);
+ }
+ });
+
+ dumpLowerInfo();
+
+ // Optimization passes (optional)
+ compiler::pass::PassRunner{}
+ .append(std::make_unique<compiler::pass::PermutationEliminationPass>(*this))
+ .run();
+
+ // TODO Update LowerInfo for training
+
+ VERBOSE(LoweredTrainableGraph) << "Dump after all the passes" << std::endl;
+ for (auto &&operand : _trainable_graph.getInputs())
+ VERBOSE(LoweredTrainableGraph) << "Graph Input : " << operand << std::endl;
+ for (auto &&operand : _trainable_graph.getOutputs())
+ VERBOSE(LoweredTrainableGraph) << "Graph Output : " << operand << std::endl;
+ dumper::text::dumpLoweredGraph(*this);
+
+ // Graph verifications
+ {
+ assert(ir::verifier::InputOutputChecker().verify(_trainable_graph.graph()));
+ assert(ir::verifier::DAGChecker().verify(_trainable_graph.graph()));
+ assert(ir::verifier::EdgeChecker().verify(_trainable_graph.graph()));
+ }
+}
+
+void LoweredTrainableGraph::makeLowerInfo(const compiler::BackendResolver &backend_resolver)
+{
+ _trainable_graph.operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &) {
+ lower_info().operand.set(index, std::make_unique<OperandLowerInfo>());
+ });
+
+ // Set operand lower info using assigned backends to operations
+ _trainable_graph.operations().iterate(
+ [&](const ir::OperationIndex &op_ind, const ir::IOperation &op) {
+ auto backend = backend_resolver.getBackend(op_ind);
+ if (!backend)
+ {
+ throw std::runtime_error{"Fail to find backend for " + op.name() + " operation"};
+ }
+
+ auto frontend_layout = _trainable_graph.layout();
+
+ // The layout of each backend should be set at another place
+ // TODO Change setting layout of each backend at another place
+ auto backend_layout = backend->config()->supportLayout(op, frontend_layout);
+
+ for (auto &&ind : op.getInputs() | ir::Remove::UNDEFINED)
+ {
+ auto &operand_li = lower_info().operand.at(ind);
+ operand_li.addUsePermuteFactor(PermuteFactor{backend, backend_layout});
+ }
+ for (auto &&ind : op.getOutputs() | ir::Remove::UNDEFINED)
+ {
+ auto &operand_li = lower_info().operand.at(ind);
+ operand_li.addDefPermuteFactor(PermuteFactor{backend, backend_layout});
+ }
+ lower_info().operation.set(
+ op_ind, std::make_unique<compiler::OperationLowerInfo>(backend, backend_layout));
+ });
+
+ // Handle graph inputs and outputs
+ const auto builtin_backend = BackendManager::get().getBuiltin();
+ auto factor = PermuteFactor{builtin_backend, _trainable_graph.layout()};
+ for (auto &&index : _trainable_graph.getInputs() | ir::Remove::UNDEFINED)
+ {
+ auto &operand_li = lower_info().operand.at(index);
+ assert(operand_li.def_factors().empty());
+ operand_li.addDefPermuteFactor(factor);
+ }
+ for (auto &&index : _trainable_graph.getOutputs() | ir::Remove::UNDEFINED)
+ {
+ auto &operand_li = lower_info().operand.at(index);
+ operand_li.addUsePermuteFactor(factor);
+ }
+
+ // Handle variable tensors
+ _trainable_graph.operands().iterate([&](const ir::OperandIndex &index, ir::Operand &operand) {
+ // Some inputs of an operation could be non-constant, but not existed in graph inputs/outputs
+ // and not undefined operand - these are variable tensors. For example,
+ // UnidirectionalSequenceLSTM has such inputs.
+ if (operand.info().isVariable())
+ {
+ // The variable operand with buffer is not supported yet
+ assert(operand.data() == nullptr);
+ assert(operand.getUses().size() == 1 && !operand.getDef().valid());
+ auto operand_li = lower_info().operand.at(index);
+ assert(operand_li.def_factors().empty());
+ operand_li.addDefPermuteFactor(operand_li.use_factors().getOnlyElement());
+ }
+ });
+}
+
+void LoweredTrainableGraph::dumpLowerInfo()
+{
+ if (::onert::util::logging::ctx.enabled() == false)
+ return;
+
+ std::map<uint32_t, std::string> dumps;
+
+ _trainable_graph.operands().iterate([&](const ir::OperandIndex &index, ir::Operand &object) {
+ const auto operand_lower_info = lower_info().operand.getRawPtr(index);
+ assert(operand_lower_info);
+ if (!operand_lower_info->def_factors().empty() || !operand_lower_info->use_factors().empty())
+ {
+ auto shape_to_string = [](const ir::Shape &shape) {
+ std::stringstream sstream;
+ sstream << "{ ";
+ for (auto i = 0; i < shape.rank(); ++i)
+ sstream << (shape.dim(i)) << " ";
+ sstream << "}";
+ return sstream.str();
+ };
+
+ auto factors_to_string = [](const PermuteFactorSet &factors) {
+ std::string str;
+ for (auto &&factor : factors)
+ {
+ str += factor.backend()->config()->id();
+ str += "(" + to_string(factor.layout()) + ")";
+ str += " ";
+ }
+ return "{ " + str + "}";
+ };
+
+ auto operation_index_set_to_string = [](const ir::OperationIndexSet &operations) {
+ std::stringstream sstream;
+ sstream << "{ ";
+ for (auto &&op : operations)
+ sstream << op << " ";
+ sstream << "}";
+ return sstream.str();
+ };
+
+ auto data_to_str = [](const ir::Data *data) {
+ return (data ? (std::to_string(data->size()) + " bytes") : "N/A");
+ };
+
+ std::string shape_str = shape_to_string(object.shape());
+ std::string def_op = operation_index_set_to_string({object.getDef()});
+ std::string use_ops = operation_index_set_to_string(object.getUses());
+ std::string def_factors = factors_to_string(operand_lower_info->def_factors());
+ std::string use_factors = factors_to_string(operand_lower_info->use_factors());
+ std::stringstream sstream;
+ sstream << "Operand " << index << " Info" << std::endl;
+ sstream << " - Shape : " << shape_str << std::endl;
+ sstream << " - Def/Uses : Def " << def_op << " Uses " << use_ops << std::endl;
+ sstream << " - Data : " << data_to_str(object.data()) << std::endl;
+ sstream << " - LowerInfo : Def " << def_factors << " Uses " << use_factors << std::endl;
+ dumps.emplace(index.value(), sstream.str());
+ }
+ });
+
+ for (const auto &e : dumps)
+ {
+ if (!e.second.empty())
+ {
+ std::istringstream iss(e.second);
+ std::string line;
+ while (std::getline(iss, line))
+ VERBOSE(Lower) << line << std::endl;
+ }
+ }
+}
+
+} // namespace train
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/train/StaticDerivativeShapeInferer.cc b/runtime/onert/core/src/compiler/train/StaticDerivativeShapeInferer.cc
new file mode 100644
index 000000000..d2153296f
--- /dev/null
+++ b/runtime/onert/core/src/compiler/train/StaticDerivativeShapeInferer.cc
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "StaticDerivativeShapeInferer.h"
+#include "util/ShapeInference.h"
+#include "util/logging.h"
+
+#include <misc/polymorphic_downcast.h>
+
+#include <sstream>
+#include <stdexcept>
+
+namespace onert
+{
+namespace compiler
+{
+namespace train
+{
+
+void StaticDerivativeShapeInferer::infer()
+{
+ // It is not determined to iterate in reverse order.
+ auto sorted_ops = _lowered_subg->graph().topolSortOperations();
+ for (auto it = sorted_ops.rbegin(); it != sorted_ops.rend(); ++it)
+ {
+ const auto op_idx = *it;
+ const auto &op = _lowered_subg->trainable_graph().operation(op_idx);
+ if (checkDynamicInput(op))
+ {
+ std::stringstream msg;
+ msg << "StaticDerivativeShapeInferer does not support dynamic shape yet, ";
+ msg << op.name() << "(op index: " << op_idx << ") has dynamic shape.";
+ throw std::runtime_error(msg.str());
+ }
+
+ checkOutput(op);
+
+ op.accept(*this);
+ }
+}
+
+void StaticDerivativeShapeInferer::dump()
+{
+ // TODO dump
+}
+
+bool StaticDerivativeShapeInferer::checkDynamicInput(const ir::IOperation &op)
+{
+ const auto &operands = _lowered_subg->graph().operands();
+ for (auto input_idx : op.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED)
+ {
+ if (operands.at(input_idx).info().isDynamic())
+ {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void StaticDerivativeShapeInferer::checkOutput(const ir::IOperation &op)
+{
+ const auto &derivatives = _lowered_subg->trainable_graph().derivatives();
+ for (auto output_idx : op.getOutputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED)
+ {
+ if (!derivatives.exist(output_idx))
+ {
+ std::stringstream msg;
+ msg << "StaticDerivativeShapeInferer : Invalid output, ";
+ msg << op.name() << "'s derivative output(index: " << output_idx << ") does not exist.";
+ throw std::runtime_error(msg.str());
+ }
+ }
+}
+
+void StaticDerivativeShapeInferer::setShape(const ir::OperandIndex &index, const ir::Shape &shape)
+{
+ auto &tgraph = _lowered_subg->trainable_graph();
+
+ if (tgraph.derivatives().exist(index))
+ tgraph.changeDerivativeShape(index, shape);
+ else
+ {
+ // NOTE This code assumes the types are always the same, but I'm not sure.
+ const auto &type = tgraph.operands().at(index).typeInfo();
+ const auto new_index = tgraph.addDerivative(index, std::make_unique<ir::Operand>(shape, type));
+ assert(new_index == index);
+ UNUSED_RELEASE(new_index);
+ }
+}
+
+void StaticDerivativeShapeInferer::visit(const ir::train::operation::Conv2D &)
+{
+ // NYI
+}
+
+void StaticDerivativeShapeInferer::visit(const ir::train::operation::ElementwiseActivation &)
+{
+ // NYI
+}
+
+void StaticDerivativeShapeInferer::visit(const ir::train::operation::Loss &)
+{
+ // NYI
+}
+
+void StaticDerivativeShapeInferer::visit(const ir::train::operation::Permute &op)
+{
+ const auto &derivatives = _lowered_subg->trainable_graph().derivatives();
+
+ const auto &output_idx = op.getOutputs().at(0);
+ const auto &output = derivatives.at(output_idx);
+
+ // re-sizing input derivative shape
+ const auto &input_idx = op.getInputs().at(0);
+ const auto &new_shape = output.info().shape();
+ setShape(input_idx, new_shape);
+}
+
+void StaticDerivativeShapeInferer::visit(const ir::train::operation::Pool2D &)
+{
+ // NYI
+}
+
+void StaticDerivativeShapeInferer::visit(const ir::train::operation::Reshape &)
+{
+ // NYI
+}
+
+void StaticDerivativeShapeInferer::visit(const ir::train::operation::Softmax &)
+{
+ // NYI
+}
+
+} // namespace train
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/train/StaticDerivativeShapeInferer.h b/runtime/onert/core/src/compiler/train/StaticDerivativeShapeInferer.h
new file mode 100644
index 000000000..48b3172d2
--- /dev/null
+++ b/runtime/onert/core/src/compiler/train/StaticDerivativeShapeInferer.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_STATIC_DERIVATIVE_SHAPE_INFERER_H__
+#define __ONERT_COMPILER_STATIC_DERIVATIVE_SHAPE_INFERER_H__
+
+#include "ir/train/TrainableOperationVisitor.h"
+
+#include "compiler/train/LoweredTrainableGraph.h"
+#include "ir/Index.h"
+
+#include <memory>
+#include <unordered_map>
+
+namespace onert
+{
+namespace compiler
+{
+namespace train
+{
+
+/**
+ * @brief Class to infer shape before running kernels. It does the following:
+ * - re-calculate and set output shape at compile time (before running kernels)
+ * - if calculation cannot be done at compile time, mark the outputs to be dynamic, meaning
+ * shapes of outputs will be calculated during running kernels
+ */
+class StaticDerivativeShapeInferer : public ir::train::TrainableOperationVisitor
+{
+public:
+ StaticDerivativeShapeInferer(compiler::train::LoweredTrainableGraph *lowered_subg)
+ : _lowered_subg{lowered_subg}
+ {
+ }
+
+ /**
+ * @brief Infer shape of operands belonging to ops and set the output shape.
+ * If output shape cannot be known without running op, mark it so that it can be allocated
+ * when running kernel.
+ */
+ void infer(void);
+
+ void dump();
+
+private:
+ bool checkDynamicInput(const ir::IOperation &op);
+ void checkOutput(const ir::IOperation &op);
+ void setShape(const ir::OperandIndex &index, const ir::Shape &shape);
+
+private:
+ void visit(const ir::train::operation::Conv2D &op) override;
+ void visit(const ir::train::operation::ElementwiseActivation &op) override;
+ void visit(const ir::train::operation::Loss &op) override;
+ void visit(const ir::train::operation::Permute &op) override;
+ void visit(const ir::train::operation::Pool2D &op) override;
+ void visit(const ir::train::operation::Reshape &op) override;
+ void visit(const ir::train::operation::Softmax &op) override;
+
+private:
+ compiler::train::LoweredTrainableGraph *_lowered_subg;
+};
+
+} // namespace train
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_STATIC_DERIVATIVE_SHAPE_INFERER_H__
diff --git a/runtime/onert/core/src/compiler/train/TensorRegistries.h b/runtime/onert/core/src/compiler/train/TensorRegistries.h
new file mode 100644
index 000000000..48eaf10a1
--- /dev/null
+++ b/runtime/onert/core/src/compiler/train/TensorRegistries.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_TRAIN_TENSOR_REGISTRIES_H__
+#define __ONERT_COMPILER_TRAIN_TENSOR_REGISTRIES_H__
+
+#include "../../backend/builtin/Config.h"
+#include "../../backend/builtin/train/TensorRegistry.h"
+
+#include <backend/train/TrainableBackendContext.h>
+
+#include <memory>
+#include <unordered_set>
+
+namespace onert
+{
+namespace compiler
+{
+namespace train
+{
+
+class TensorRegistries
+{
+public:
+ TensorRegistries() = default;
+
+ TensorRegistries(const backend::train::TrainableBackendContexts &backend_contexts,
+ bool include_builtin)
+ {
+ for (const auto &e : backend_contexts)
+ {
+ auto tensor_reg = e.second->tensor_registry();
+ if (e.first->config()->id() == backend::builtin::Config::ID)
+ {
+ _builtin_tensor_reg =
+ std::dynamic_pointer_cast<backend::builtin::train::TensorRegistry>(tensor_reg);
+ if (include_builtin)
+ _tensor_regs.insert(tensor_reg);
+ }
+ else
+ {
+ _tensor_regs.insert(tensor_reg);
+ }
+ }
+ }
+
+ std::unordered_set<std::shared_ptr<backend::train::ITensorRegistry>>::const_iterator begin() const
+ {
+ return _tensor_regs.cbegin();
+ }
+ std::unordered_set<std::shared_ptr<backend::train::ITensorRegistry>>::const_iterator end() const
+ {
+ return _tensor_regs.cend();
+ }
+
+ std::shared_ptr<backend::builtin::train::TensorRegistry> getBuiltinTensorRegistry() const
+ {
+ return _builtin_tensor_reg;
+ }
+
+ backend::ITensor *getITensor(ir::OperandIndex index) const
+ {
+ for (auto &&tensor_reg : _tensor_regs)
+ {
+ auto tensor = tensor_reg->getITensor(index);
+ if (tensor)
+ return tensor;
+ }
+ return nullptr;
+ }
+
+ backend::ITensor *getDerivativeITensor(ir::OperandIndex index) const
+ {
+ for (auto &&tensor_reg : _tensor_regs)
+ {
+ auto tensor = tensor_reg->getDerivativeITensor(index);
+ if (tensor)
+ return tensor;
+ }
+ return nullptr;
+ }
+
+private:
+ std::unordered_set<std::shared_ptr<backend::train::ITensorRegistry>> _tensor_regs;
+ std::shared_ptr<backend::builtin::train::TensorRegistry> _builtin_tensor_reg;
+};
+
+} // namespace train
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_TRAIN_TENSOR_REGISTRIES_H__
diff --git a/runtime/onert/core/src/compiler/train/TrainableOperationConverter.cc b/runtime/onert/core/src/compiler/train/TrainableOperationConverter.cc
new file mode 100644
index 000000000..d20ae9fd3
--- /dev/null
+++ b/runtime/onert/core/src/compiler/train/TrainableOperationConverter.cc
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TrainableOperationConverter.h"
+
+#include "ir/train/Operations.Include.h"
+#include "util/Utils.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace train
+{
+
+TrainableOperationConverter::TrainableOperationConverter(
+ ir::train::TrainableGraph &tgraph, const compiler::train::TrainingInfo *training_info)
+ : UntrainableOperationConverter{tgraph}, _training_info{training_info}
+{
+ // Avoid unused-private-field error
+ UNUSED_RELEASE(_training_info);
+}
+
+void TrainableOperationConverter::visit(const ir::operation::Conv2D &node)
+{
+ _return_op = std::make_unique<ir::train::operation::Conv2D>(node);
+}
+
+void TrainableOperationConverter::visit(const ir::operation::ElementwiseActivation &node)
+{
+ if (node.param().op_type == ir::operation::ElementwiseActivation::Type::RELU)
+ {
+ _return_op = std::make_unique<ir::train::operation::ElementwiseActivation>(node);
+ }
+ else
+ {
+ UntrainableOperationConverter::visit(node);
+ }
+}
+
+void TrainableOperationConverter::visit(const ir::operation::FullyConnected &node)
+{
+ _return_op = std::make_unique<ir::train::operation::FullyConnected>(node);
+}
+
+void TrainableOperationConverter::visit(const ir::operation::Loss &node)
+{
+ _return_op = std::make_unique<ir::train::operation::Loss>(node);
+}
+
+void TrainableOperationConverter::visit(const ir::operation::Permute &node)
+{
+ _return_op = std::make_unique<ir::train::operation::Permute>(node);
+}
+
+void TrainableOperationConverter::visit(const ir::operation::Pool2D &node)
+{
+ _return_op = std::make_unique<ir::train::operation::Pool2D>(node);
+}
+
+void TrainableOperationConverter::visit(const ir::operation::Reshape &node)
+{
+ _return_op = std::make_unique<ir::train::operation::Reshape>(node);
+}
+
+void TrainableOperationConverter::visit(const ir::operation::Softmax &node)
+{
+ _return_op = std::make_unique<ir::train::operation::Softmax>(node);
+}
+
+} // namespace train
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/train/TrainableOperationConverter.h b/runtime/onert/core/src/compiler/train/TrainableOperationConverter.h
new file mode 100644
index 000000000..5f6fc10c3
--- /dev/null
+++ b/runtime/onert/core/src/compiler/train/TrainableOperationConverter.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_TRAIN_TRAINABLE_OPERATION_CONVERTER_H__
+#define __ONERT_COMPILER_TRAIN_TRAINABLE_OPERATION_CONVERTER_H__
+
+#include "UntrainableOperationConverter.h"
+
+#include "compiler/train/TrainingInfo.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace train
+{
+
+class TrainableOperationConverter : public UntrainableOperationConverter
+{
+public:
+ TrainableOperationConverter(ir::train::TrainableGraph &trainable_graph,
+ const compiler::train::TrainingInfo *training_info);
+
+ using UntrainableOperationConverter::operator();
+
+private:
+ void visit(const ir::operation::Conv2D &) override;
+ void visit(const ir::operation::ElementwiseActivation &) override;
+ void visit(const ir::operation::FullyConnected &) override;
+ void visit(const ir::operation::Loss &node) override;
+ void visit(const ir::operation::Permute &node) override;
+ void visit(const ir::operation::Pool2D &node) override;
+ void visit(const ir::operation::Reshape &) override;
+ void visit(const ir::operation::Softmax &) override;
+
+private:
+ const compiler::train::TrainingInfo *_training_info;
+};
+
+} // namespace train
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_TRAIN_TRAINABLE_OPERATION_CONVERTER_H__
diff --git a/runtime/onert/core/src/compiler/train/TrainingCompiler.cc b/runtime/onert/core/src/compiler/train/TrainingCompiler.cc
new file mode 100644
index 000000000..711af1651
--- /dev/null
+++ b/runtime/onert/core/src/compiler/train/TrainingCompiler.cc
@@ -0,0 +1,299 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TrainingCompiler.h"
+
+#include "StaticDerivativeShapeInferer.h"
+#include "TrainableOperationConverter.h"
+#include "pass/LossInsertionPass.h"
+#include "../CompilerHelpers.h"
+#include "../ExecutorFactory.h"
+#include "../pass/ConstantOutputPass.h"
+#include "../pass/OddOutputPass.h"
+#include "../pass/PassRunner.h"
+#include "../pass/UnusedOperandEliminationPass.h"
+#include "../ShapeValidator.h"
+#include "../../dumper/dot/DotDumper.h"
+#include "../../exec/train/TrainableExecutors.h"
+#include "../../ir/OperationDumper.h"
+#include "../../ir/verifier/Verifier.h"
+
+#include <compiler/StaticShapeInferer.h>
+#include <compiler/train/LoweredTrainableGraph.h>
+#include <ir/train/TrainableGraph.h>
+#include <exec/train/optimizer/SGD.h>
+
+#include <misc/polymorphic_downcast.h>
+#include <misc/string_helpers.h>
+
+namespace onert
+{
+namespace compiler
+{
+namespace train
+{
+
+TrainingCompiler::TrainingCompiler(const std::shared_ptr<ir::NNPkg> &nnpkg,
+ std::vector<std::unique_ptr<CompilerOptions>> &copts,
+ const TrainingInfo &training_info)
+ : _model{nnpkg->primary_model()}, _options{copts[0].get()}, _training_info{training_info}
+{
+ if (nnpkg->model_count() > 1)
+ throw std::runtime_error("TrainingCompiler does not support multiple models yet");
+
+ if (nnpkg->primary_model()->subgraphs_count() > 1)
+ throw std::runtime_error("TrainingCompiler does not support multiple subgraphs yet");
+}
+
+std::shared_ptr<CompilerArtifact> TrainingCompiler::compile(void)
+{
+ /***************************************************
+ * Prepare compilation phase
+ ***************************************************/
+ if (!_options)
+ throw std::runtime_error{"Empty compile option"};
+
+ // Mode check
+ // TODO handle option for each model
+ if (_options->he_profiling_mode)
+ {
+ if (!_options->he_scheduler)
+ throw std::runtime_error("Heterogeneous scheduler must be enabled during profiling.");
+
+ if (_options->executor != "Dataflow")
+ throw std::runtime_error("Profiling mode works only with 'Dataflow' executor");
+ }
+
+ if (!_options->minmax_filepath.empty())
+ {
+ if (_options->executor != "Linear")
+ throw std::runtime_error("Recording minmax works only with Linear executor");
+ }
+
+ _options->forceInternalOptions();
+ _options->verboseOptions();
+
+ auto custom_kernel_builder = _model->getKernelBuilder();
+
+ _model->iterate([&](const ir::SubgraphIndex &, ir::IGraph &graph) {
+ auto &subg = nnfw::misc::polymorphic_downcast<ir::Graph &>(graph);
+ // Mandatory passes
+ compiler::pass::PassRunner{}
+ .append(std::make_unique<compiler::pass::ConstantOutputPass>(subg))
+ .append(std::make_unique<compiler::pass::OddOutputPass>(subg))
+ .run();
+
+ // Optimizations
+ compiler::pass::PassRunner{}
+ .append(std::make_unique<compiler::pass::UnusedOperandEliminationPass>(subg))
+ .run();
+ });
+
+ std::unordered_map<ir::SubgraphIndex, std::shared_ptr<ir::train::TrainableGraph>>
+ trainable_subgraphs;
+
+ if (_model->hasOnly<ir::Graph>())
+ {
+ // Create trainable subgraphs by copy and converting inference model
+ _model->iterate([&](const ir::SubgraphIndex &subg_index, const ir::IGraph &graph) {
+ const auto &subg = nnfw::misc::polymorphic_downcast<const ir::Graph &>(graph);
+ // Create TrainableGraph by copying Graph
+ auto trainable_subg = std::make_shared<ir::train::TrainableGraph>(subg);
+
+ // Convert operations to trainable operations
+ auto converter = TrainableOperationConverter{*trainable_subg, &_training_info};
+ subg.operations().iterate(
+ [&](const onert::ir::OperationIndex &op_index, const onert::ir::IOperation &op) {
+ auto trainable_op = converter(op);
+ auto gen_index = trainable_subg->replaceOperation(op_index, std::move(trainable_op));
+ UNUSED_RELEASE(gen_index);
+ assert(gen_index == op_index);
+ });
+
+ trainable_subgraphs[subg_index] = std::move(trainable_subg);
+ });
+ }
+ else
+ {
+ // TODO Support models that have TrainableGraphs
+ throw std::runtime_error("TrainingCompiler: Invalid model");
+ }
+
+ // operation
+ _model.reset();
+
+ // Apply pass for trainable subgraphs
+ for (auto &&pair : trainable_subgraphs)
+ {
+ auto trainable_subg = pair.second;
+ auto subg_index = pair.first;
+
+ compiler::pass::PassRunner{}
+ .append(std::make_unique<train::pass::LossInsertionPass>(*trainable_subg, &_training_info,
+ subg_index))
+ .run();
+ }
+
+ // Change input shape according to batch_size
+ for (auto &&pair : trainable_subgraphs)
+ {
+ auto trainable_subg = pair.second;
+
+ for (const auto &ind : trainable_subg->getInputs())
+ {
+ auto &input = trainable_subg->operands().at(ind);
+ auto new_shape = input.info().shape();
+ // TODO Consider batch size index
+ if (new_shape.dim(0) != 1)
+ throw std::runtime_error("the first dim is not 1. It is not supported yet.");
+ new_shape.dim(0) = _training_info.batchSize();
+ input.info().shape(new_shape);
+ }
+ }
+
+ /***************************************************
+ * Backend independent analysis & optimization phase
+ ***************************************************/
+ // TODO Handle dump level for each model
+ auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_options->graph_dump_level);
+ onert::dumper::dot::DotDumper dot_dumper(dump_level);
+
+ // Tracing context
+ auto tracing_ctx = std::make_unique<util::TracingCtx>();
+
+ // Lower: Assign backend
+ std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::train::LoweredTrainableGraph>>
+ lowered_subgs;
+ {
+ for (auto &&pair : trainable_subgraphs)
+ {
+ auto &subg_index = pair.first;
+ auto trainable_subg = pair.second;
+
+ // Lower: Assign backend
+ lowered_subgs[subg_index] =
+ std::make_unique<compiler::train::LoweredTrainableGraph>(*trainable_subg, *_options);
+ // Set tracing_ctx for copied graph
+ if (tracing_ctx != nullptr)
+ tracing_ctx->setSubgraphIndex(&(lowered_subgs[subg_index]->graph()), subg_index.value());
+ }
+ }
+
+ for (const auto &pair : lowered_subgs)
+ {
+ const auto &subg_index = pair.first;
+ const auto &lowered_subg = pair.second;
+ dot_dumper.dump(*lowered_subg, nnfw::misc::str("after_lower_subg-", subg_index.value()));
+ }
+
+ // Set derivatives as default tensor info
+ for (const auto &pair : lowered_subgs)
+ {
+ auto lowered_subg = pair.second.get();
+ auto &tgraph = lowered_subg->trainable_graph();
+ tgraph.operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &obj) {
+ if (!obj.isConstant())
+ {
+ auto deriv = std::make_unique<ir::Operand>(obj);
+ const auto gen_index = tgraph.addDerivative(index, std::move(deriv));
+ assert(gen_index == index);
+ UNUSED_RELEASE(gen_index);
+ }
+ });
+ }
+
+ // Shape inference.
+ {
+ // Run the StaticShapeInfer of primary subg. All child StaticShapeInferers are called
+ // recursively
+ std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers =
+ createStaticShapeInferers(lowered_subgs);
+
+ const auto primary_subg_idx = ir::SubgraphIndex{0};
+ inferers.at(primary_subg_idx)->infer();
+
+ for (const auto &pair_inferer : inferers)
+ {
+ const auto inferer = pair_inferer.second.get();
+ inferer->dump();
+ }
+
+ // NOTE StaticDerivativeShapeInferer is allocated for each subgraph,
+ // so it does not support models that have controlflow operations yet.
+ for (auto &&pair : lowered_subgs)
+ {
+ auto &lowered_subg = pair.second;
+ auto inferer = std::make_unique<StaticDerivativeShapeInferer>(lowered_subg.get());
+ inferer->infer();
+ inferer->dump();
+ }
+ }
+
+ // Shape validation
+ for (const auto &pair : lowered_subgs)
+ {
+ auto &lowered_subg = pair.second;
+ compiler::ShapeValidator{lowered_subg->graph()}();
+ }
+
+ // TODO Validate shapes of derivative tensors
+
+ // Create optimizer
+ // TODO Set properties of optimizer
+ std::shared_ptr<exec::train::optimizer::Optimizer> optimizer;
+ const auto &optim_info = _training_info.optimizerInfo();
+ if (optim_info.optim_code == exec::train::optimizer::OptimizerCode::SGD)
+ optimizer = std::make_shared<exec::train::optimizer::SGD>(optim_info.learning_rate);
+ else
+ throw std::runtime_error("Invalid optimizer type, " +
+ exec::train::optimizer::toString(optim_info.optim_code));
+
+ /*************************************************************
+ * Backend independent analysis & optimization phase finished
+ *************************************************************/
+ auto executors = std::make_shared<exec::train::TrainableExecutors>();
+ for (auto &&pair : lowered_subgs)
+ {
+ auto const model_index = ir::ModelIndex{0};
+ auto const subg_index = pair.first;
+ auto &lowered_subg = pair.second;
+ auto const indexed_ranks = lowered_subg->indexed_ranks();
+
+ ir::OperationDumper dumper("Executor generation of Subgraph " +
+ std::to_string(subg_index.value()));
+ lowered_subg->graph().operations().iterate(
+ [&](const ir::OperationIndex &, const ir::IOperation &op) { op.accept(dumper); });
+
+ ExecutorFactoryArgs args;
+ args.tracing_ctx = tracing_ctx.get();
+ args.options = _options;
+ args.model_index = model_index;
+ args.custom_kernel_builder = custom_kernel_builder;
+ auto executor = std::unique_ptr<exec::IExecutor>{
+ ExecutorFactory::get().create(std::move(lowered_subg), executors, args, optimizer)};
+ executor->setIndexedRanks(indexed_ranks);
+ executors->emplace(model_index, subg_index, std::move(executor));
+ }
+
+ /********************************
+ * Code generation phase finished
+ ********************************/
+ return std::make_shared<CompilerArtifact>(executors, std::move(tracing_ctx));
+}
+
+} // namespace train
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/train/TrainingCompiler.h b/runtime/onert/core/src/compiler/train/TrainingCompiler.h
new file mode 100644
index 000000000..b93437217
--- /dev/null
+++ b/runtime/onert/core/src/compiler/train/TrainingCompiler.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file TrainingCompiler.h
+ * @brief This file contains TrainingCompiler class to define and run compilation phase
+ */
+
+#ifndef __ONERT_COMPILER_TRAIN_TRAINING_COMPILER_H_
+#define __ONERT_COMPILER_TRAIN_TRAINING_COMPILER_H_
+
+#include "compiler/CompilerOptions.h"
+#include "compiler/ICompiler.h"
+#include "compiler/train/TrainingInfo.h"
+#include "ir/NNPkg.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace train
+{
+
+/**
+ * @brief Class to compile NN package
+ */
+class TrainingCompiler : public ICompiler
+{
+public:
+ /**
+ * @brief Construct a new TrainingCompiler object for single model
+ * @param[in] model model to compile
+ * @param[in] inference_compiler Compiler for inference
+ * @param[in] coptions Compiler Options
+ * @param[in] training_info Training information
+ */
+ explicit TrainingCompiler(const std::shared_ptr<ir::NNPkg> &nnpkg,
+ std::vector<std::unique_ptr<CompilerOptions>> &copts,
+ const TrainingInfo &training_info);
+
+ /**
+ * @brief Default Construct
+ *
+ */
+ TrainingCompiler(void) = delete;
+
+ /**
+ * @brief Destroy the TrainingCompiler object
+ */
+ ~TrainingCompiler() = default;
+
+public:
+ /**
+ * @brief Do compilation with the options
+ *
+ * @return std::shared_ptr<CompilerArtifact> Executors as a result of compilation
+ */
+ std::shared_ptr<CompilerArtifact> compile(void);
+
+private:
+ std::shared_ptr<ir::Model> _model;
+ CompilerOptions *_options;
+ const TrainingInfo _training_info;
+};
+
+} // namespace train
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_TRAIN_TRAINING_COMPILER_H_
diff --git a/runtime/onert/core/src/compiler/train/UntrainableOperationConverter.cc b/runtime/onert/core/src/compiler/train/UntrainableOperationConverter.cc
new file mode 100644
index 000000000..6a5a052b6
--- /dev/null
+++ b/runtime/onert/core/src/compiler/train/UntrainableOperationConverter.cc
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "UntrainableOperationConverter.h"
+
+#include "ir/train/operation/UntrainableOperation.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace train
+{
+
+UntrainableOperationConverter::UntrainableOperationConverter(ir::train::TrainableGraph &tgraph)
+ : _tgraph{tgraph}, _return_op{nullptr}
+{
+}
+
+std::unique_ptr<ir::train::ITrainableOperation> UntrainableOperationConverter::
+operator()(const ir::IOperation &op)
+{
+ op.accept(*this);
+
+ return std::move(_return_op);
+}
+
+#define OP(InternalName) \
+ void UntrainableOperationConverter::visit(const ir::operation::InternalName &node) \
+ { \
+ _return_op = \
+ std::make_unique<ir::train::operation::UntrainableOperation<ir::operation::InternalName>>( \
+ node); \
+ }
+#include "ir/Operations.lst"
+#undef OP
+
+} // namespace train
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/train/UntrainableOperationConverter.h b/runtime/onert/core/src/compiler/train/UntrainableOperationConverter.h
new file mode 100644
index 000000000..e960b3831
--- /dev/null
+++ b/runtime/onert/core/src/compiler/train/UntrainableOperationConverter.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_TRAIN_UNTRAINABLE_OPERATION_CONVERTER_H__
+#define __ONERT_COMPILER_TRAIN_UNTRAINABLE_OPERATION_CONVERTER_H__
+
+#include "ir/Operations.Include.h"
+#include "ir/OperationVisitor.h"
+#include "ir/train/TrainableGraph.h"
+
+#include <memory>
+
+namespace onert
+{
+namespace compiler
+{
+namespace train
+{
+
+class UntrainableOperationConverter : public ir::OperationVisitor
+{
+public:
+ UntrainableOperationConverter(ir::train::TrainableGraph &tgraph);
+ std::unique_ptr<ir::train::ITrainableOperation> operator()(const ir::IOperation &op);
+
+#define OP(InternalName) void visit(const ir::operation::InternalName &node);
+#include "ir/Operations.lst"
+#undef OP
+
+protected:
+ ir::train::TrainableGraph &_tgraph;
+ std::unique_ptr<ir::train::ITrainableOperation> _return_op;
+};
+
+} // namespace train
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_TRAIN_UNTRAINABLE_OPERATION_CONVERTER_H__
diff --git a/runtime/onert/core/src/compiler/train/pass/LossInsertionPass.cc b/runtime/onert/core/src/compiler/train/pass/LossInsertionPass.cc
new file mode 100644
index 000000000..3e01a9739
--- /dev/null
+++ b/runtime/onert/core/src/compiler/train/pass/LossInsertionPass.cc
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "LossInsertionPass.h"
+
+#include "ir/train/TrainableGraph.h"
+#include "ir/train/operation/Loss.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace train
+{
+namespace pass
+{
+
+void LossInsertionPass::run()
+{
+ const auto &loss_info = _training_info->lossInfo();
+
+ ir::operation::Loss::Param param;
+ param.op_type = loss_info.type;
+
+ if (_trainable_graph.getOutputs().size() != 1)
+ {
+ throw std::runtime_error("LossInsertionPass: Not supported multiple outputs");
+ }
+
+ // TODO Consider SparseCategoricalCrossentropy y_true shape
+ // SparseCategoricalCrossentropy loss has a different y_true shape than y_pred.
+
+ // TODO Implement Loop [0, getOutputs().size())
+ // index: a loop index
+ const auto index = 0;
+ const auto &y_pred_index = _trainable_graph.getOutputs().at(index);
+ const auto &y_pred = _trainable_graph.operands().at(y_pred_index);
+ const auto &shape = y_pred.shape();
+ const auto &type_info = y_pred.typeInfo();
+ auto y_true_index = _trainable_graph.addOperand(shape, type_info);
+ ir::OperandIndexSequence inputs{y_pred_index, y_true_index};
+
+ // TODO Consider Reduction
+ // Some types of Reduction have the same shape y_true and output.
+
+ const ir::TypeInfo float_op(ir::DataType::FLOAT32);
+ auto output_index = _trainable_graph.addOperand(ir::Shape{1}, float_op);
+ ir::OperandIndexSequence outputs{output_index};
+
+ auto loss_op = std::make_unique<ir::operation::Loss>(inputs, outputs, param);
+ auto trainable_loss_op = std::make_unique<ir::train::operation::Loss>(*loss_op);
+
+ _trainable_graph.addOperation(std::move(trainable_loss_op));
+
+ _trainable_graph.addInput(y_true_index);
+
+ // TODO Add loss as many as output size
+ _trainable_graph.addLoss(output_index, ir::IOIndex{index});
+}
+
+} // namespace pass
+} // namespace train
+} // namespace compiler
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/train/pass/LossInsertionPass.h b/runtime/onert/core/src/compiler/train/pass/LossInsertionPass.h
new file mode 100644
index 000000000..ed4d60c96
--- /dev/null
+++ b/runtime/onert/core/src/compiler/train/pass/LossInsertionPass.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_TRAIN_PASS_LOSS_INSERTION_PASS_H__
+#define __ONERT_COMPILER_TRAIN_PASS_LOSS_INSERTION_PASS_H__
+
+#include "Pass.h"
+
+#include "compiler/train/TrainingInfo.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace train
+{
+namespace pass
+{
+
+class LossInsertionPass : public Pass
+{
+public:
+ LossInsertionPass(ir::train::TrainableGraph &trainable_graph, const TrainingInfo *training_info,
+ const ir::SubgraphIndex &subg_index)
+ : Pass{trainable_graph, training_info}, _subg_index{subg_index}
+ {
+ }
+
+public:
+ std::string id() final { return "LossInsertionPass"; }
+ void run() final;
+
+private:
+ ir::SubgraphIndex _subg_index;
+};
+
+} // namespace pass
+} // namespace train
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_TRAIN_PASS_LOSS_INSERTION_PASS_H__
diff --git a/runtime/onert/core/src/compiler/train/pass/Pass.h b/runtime/onert/core/src/compiler/train/pass/Pass.h
new file mode 100644
index 000000000..d64c06cf4
--- /dev/null
+++ b/runtime/onert/core/src/compiler/train/pass/Pass.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_TRAIN_PASS_PASS_H__
+#define __ONERT_COMPILER_TRAIN_PASS_PASS_H__
+
+#include "../../pass/IPass.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace train
+{
+class TrainableGraph;
+} // namespace train
+} // namespace ir
+} // namespace onert
+
+namespace onert
+{
+namespace compiler
+{
+namespace train
+{
+
+class TrainingInfo;
+
+namespace pass
+{
+
+class Pass : public compiler::pass::IPass
+{
+public:
+ Pass(ir::train::TrainableGraph &trainable_graph, const TrainingInfo *training_info)
+ : _trainable_graph{trainable_graph}, _training_info{training_info}
+ {
+ }
+ virtual ~Pass() = default;
+
+protected:
+ ir::train::TrainableGraph &_trainable_graph;
+ const TrainingInfo *_training_info;
+};
+
+} // namespace pass
+} // namespace train
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_TRAIN_PASS_PASS_H__
diff --git a/runtime/onert/core/src/dumper/dot/DotBuilder.cc b/runtime/onert/core/src/dumper/dot/DotBuilder.cc
index 38a69696e..9257434fa 100644
--- a/runtime/onert/core/src/dumper/dot/DotBuilder.cc
+++ b/runtime/onert/core/src/dumper/dot/DotBuilder.cc
@@ -29,31 +29,12 @@ DotBuilder::DotBuilder() {}
void DotBuilder::update(const Node &node_info)
{
add(node_info);
- for (auto edge : node_info.out_edges())
+ for (auto &&edge : node_info.out_edges())
{
addEdge(node_info, *edge);
}
}
-void DotBuilder::addOpSequence(const DotSubgraphInfo &subgraph_info)
-{
- _dot << "subgraph cluster_" << subgraph_info.index().value() << " {\n";
- _dot << " label=\"" << subgraph_info.label() << "\";\n";
- _dot << " style=filled;\n";
- _dot << " color=lightgrey;\n";
- _dot << " ";
- for (auto op : subgraph_info.operations())
- {
- _dot << "operation" << op.value() << "; ";
- }
- for (auto op : subgraph_info.operands())
- {
- _dot << "operand" << op.value() << "; ";
- }
- _dot << "\n";
- _dot << "}\n";
-}
-
void DotBuilder::writeDot(std::ostream &os)
{
os << "digraph D {\n"
@@ -66,7 +47,7 @@ void DotBuilder::add(const Node &node)
_dot << node.id();
std::stringstream ss;
_dot << "[";
- for (auto attr : node.attributes())
+ for (auto &&attr : node.attributes())
{
_dot << attr.first << "=\"" << attr.second << "\" ";
}
diff --git a/runtime/onert/core/src/dumper/dot/DotBuilder.h b/runtime/onert/core/src/dumper/dot/DotBuilder.h
index 681cbbf5d..30f32f8f9 100644
--- a/runtime/onert/core/src/dumper/dot/DotBuilder.h
+++ b/runtime/onert/core/src/dumper/dot/DotBuilder.h
@@ -25,7 +25,6 @@
#include "OperationNode.h"
#include "OperandNode.h"
-#include "DotSubgraphInfo.h"
using Operation = onert::ir::Operation;
using Object = onert::ir::Operand;
@@ -44,7 +43,6 @@ public:
public:
void update(const Node &dotinfo);
- void addOpSequence(const DotSubgraphInfo &subgraph_info);
void writeDot(std::ostream &os);
diff --git a/runtime/onert/core/src/dumper/dot/DotDumper.cc b/runtime/onert/core/src/dumper/dot/DotDumper.cc
index 118057f09..ab77a6c62 100644
--- a/runtime/onert/core/src/dumper/dot/DotDumper.cc
+++ b/runtime/onert/core/src/dumper/dot/DotDumper.cc
@@ -19,8 +19,7 @@
#include "DotDumper.h"
#include "DotBuilder.h"
-#include "DotSubgraphInfo.h"
-#include "ir/OpSequence.h"
+#include "ir/OperandIndexMap.h"
#include "ir/OperationIndexMap.h"
#include "backend/Backend.h"
#include "backend/IConfig.h"
@@ -33,151 +32,153 @@ namespace dumper
namespace dot
{
-void DotDumper::dump(const std::string &tag)
+namespace
{
- if (_level == Level::OFF)
- {
- return;
- }
-
- onert::dumper::dot::DotBuilder dot_builder;
-
- auto &operations = _graph.operations();
- auto &operands = _graph.operands();
-
- ir::OperationIndexMap<std::unique_ptr<Operation>> operation_nodes;
- std::unordered_map<ir::OperandIndex, std::unique_ptr<Operand>> operand_nodes;
-
- auto backend_to_fillcolor = [](const backend::Backend *backend) {
- static const auto map = []() {
- std::unordered_map<const backend::Backend *, std::string> ret;
- uint32_t index = 1; // Start from 1 to avoid 0(red) which is too dark :(
- for (const auto backend : compiler::BackendManager::get().getAll())
- {
- ret.emplace(backend, Node::BG_COLORS[index]);
- index = (index + 1) % (sizeof(Node::BG_COLORS) / sizeof(Node::BG_COLORS[0]));
- }
- return ret;
- }();
-
- auto itr = map.find(backend);
- if (itr == map.end())
- {
- return Node::DEFAULT_FILLCOLOR;
- }
- else
+std::string backend_to_fillcolor(const backend::Backend *backend)
+{
+ static const auto map = []() {
+ std::unordered_map<const backend::Backend *, std::string> ret;
+ uint32_t index = 1; // Start from 1 to avoid 0(red) which is too dark :(
+ for (const auto backend : compiler::BackendManager::get().getAll())
{
- return itr->second;
+ ret.emplace(backend, Node::BG_COLORS[index]);
+ index = (index + 1) % (sizeof(Node::BG_COLORS) / sizeof(Node::BG_COLORS[0]));
}
- };
+ return ret;
+ }();
+ auto itr = map.find(backend);
+ if (itr == map.end())
+ {
+ return Node::DEFAULT_FILLCOLOR;
+ }
+ else
+ {
+ return itr->second;
+ }
+}
- util::Set<ir::OperandIndex> shown_operand_set;
+std::unordered_map<ir::OperandIndex, std::unique_ptr<Operand>>
+generate_dot_operands(const ir::Graph &graph, const DotDumper::Level level)
+{
+ std::unordered_map<ir::OperandIndex, std::unique_ptr<Operand>> dot_operands;
+ const auto &operands = graph.operands();
operands.iterate([&](const ir::OperandIndex &index, const ir::Operand &object) {
- bool showing_cond = false;
- if (_level == Level::ALL)
- {
- showing_cond = true;
- }
- else
- {
- showing_cond = !object.isConstant();
- }
- if (object.isConstant() || _graph.getInputs().contains(index))
- {
- showing_cond = showing_cond && (object.getUses().size() > 0);
- }
+ bool showing_cond =
+ level == DotDumper::Level::ALL
+ ? true
+ : !object.isConstant() || (graph.getInputs() + graph.getOutputs()).contains(index);
if (showing_cond)
{
- shown_operand_set.add(index);
-
auto type = [&]() {
using onert::dumper::dot::Operand;
- if (_graph.getInputs().contains(index))
+ if (graph.getInputs().contains(index))
return Operand::Type::MODEL_INPUT;
- if (_graph.getOutputs().contains(index))
+ if (graph.getOutputs().contains(index))
return Operand::Type::MODEL_OUTPUT;
return Operand::Type::INTERNAL;
}();
auto node = std::make_unique<Operand>(index, type);
+ std::string label = std::to_string(index.value());
+ std::string fillcolor = "";
+ node->setAttribute("label", label);
+ node->setAttribute("fillcolor", fillcolor);
- {
- // Display LowerInfo attributes
- std::string label = std::to_string(index.value());
- std::string fillcolor = "";
- if (_lowered_graph)
- {
- auto lower_info = _lowered_graph->getLowerInfo(index);
- const auto &def_factors = lower_info->def_factors();
- if (def_factors.size() > 0)
- {
- label += "\\n[";
- label += def_factors.getOnlyElement().backend()->config()->id();
- label += "]";
-
- fillcolor = backend_to_fillcolor(lower_info->def_factors().getOnlyElement().backend());
- }
- }
- node->setAttribute("label", label);
- node->setAttribute("fillcolor", fillcolor);
- }
-
- operand_nodes.emplace(index, std::move(node));
+ dot_operands.emplace(index, std::move(node));
}
});
- operations.iterate([&](const ir::OperationIndex &index, const ir::Operation &op) {
+ return dot_operands;
+}
+
+ir::OperationIndexMap<std::unique_ptr<Operation>>
+generate_dot_operations(const ir::Graph &graph,
+ const ir::OperandIndexMap<std::unique_ptr<Operand>> &dot_operands)
+{
+ ir::OperationIndexMap<std::unique_ptr<Operation>> dot_operations;
+ const auto &operations = graph.operations();
+ operations.iterate([&](const ir::OperationIndex &index, const ir::IOperation &op) {
auto node = std::make_unique<Operation>(index, op);
- for (auto input : op.getInputs())
+ for (auto &&input : op.getInputs())
{
using onert::dumper::dot::Operand;
// Constant input and dump level is ALL_BUT_CONSTANTS
- if (operand_nodes.find(input) == operand_nodes.end())
+ if (dot_operands.find(input) == dot_operands.end())
continue;
- auto &input_node = operand_nodes.at(input);
+ auto &input_node = dot_operands.at(input);
input_node->addOutEdge(node.get());
}
- for (auto output : op.getOutputs())
+ for (auto &&output : op.getOutputs() | ir::Remove::UNDEFINED)
{
using onert::dumper::dot::Operand;
- auto &output_node = operand_nodes.at(output);
+ auto &output_node = dot_operands.at(output);
node->addOutEdge(output_node.get());
}
- operation_nodes.emplace(index, std::move(node));
+ dot_operations.emplace(index, std::move(node));
});
- if (_lowered_graph)
- {
- const auto &op_seqs = _lowered_graph->op_seqs();
- op_seqs.iterate([&](const ir::OpSequenceIndex &index, const ir::OpSequence &op_seq) {
- const auto lower_info = _lowered_graph->getLowerInfo(index);
+ return dot_operations;
+}
+
+void update_lower_info(const compiler::ILoweredGraph &lowered_graph,
+ ir::OperandIndexMap<std::unique_ptr<Operand>> *dot_operands)
+{
+ const auto &operands = lowered_graph.graph().operands();
+ operands.iterate([&](const ir::OperandIndex &index, const ir::Operand &) {
+ auto itr = dot_operands->find(index);
+ if (itr != dot_operands->end())
+ {
+ auto &node = itr->second;
+ // Display LowerInfo attributes
+ std::string label = node->getAttribute("label");
+ std::string fillcolor = node->getAttribute("fillcolor");
+ auto lower_info = lowered_graph.lower_info().operand.getRawPtr(index);
+ const auto &def_factors = lower_info->def_factors();
+ if (def_factors.size() > 0)
+ {
+ label += "\\n[";
+ label += def_factors.getOnlyElement().backend()->config()->id();
+ label += "]";
+ fillcolor = backend_to_fillcolor(lower_info->def_factors().getOnlyElement().backend());
+ }
+ node->setAttribute("label", label);
+ node->setAttribute("fillcolor", fillcolor);
+ }
+ });
+}
+
+void update_lower_info(const compiler::ILoweredGraph &lowered_graph,
+ ir::OperationIndexMap<std::unique_ptr<Operation>> *dot_operations)
+{
+ const auto &operations = lowered_graph.graph().operations();
+ operations.iterate([&](const ir::OperationIndex &index, const ir::IOperation &) {
+ const auto lower_info = lowered_graph.lower_info().operation.getRawPtr(index);
+ if (lower_info)
+ {
auto fillcolor = backend_to_fillcolor(lower_info->backend());
- std::string label =
- std::to_string(index.value()) + " [" + lower_info->backend()->config()->id() + "]";
- DotSubgraphInfo subgraph_info{index, op_seq, shown_operand_set, _graph.operations()};
- subgraph_info.label(label);
- subgraph_info.fillcolor(fillcolor);
- dot_builder.addOpSequence(subgraph_info);
-
- // Set fillcolor of all operations in the op_seq
- for (const auto &op_idx : op_seq.operations())
+ std::string backend_label = "[" + lower_info->backend()->config()->id() + "]";
+ auto itr = dot_operations->find(index);
+ if (itr != dot_operations->end())
{
- auto found = operation_nodes.find(op_idx);
- if (found != operation_nodes.end())
- {
- auto &&op = found->second;
- op->setAttribute("fillcolor", fillcolor);
- }
+ auto &node = itr->second;
+ node->setAttribute("label", node->getAttribute("label") + "\n" + backend_label);
+ node->setAttribute("fillcolor", fillcolor);
}
- });
- }
+ }
+ });
+}
+void dump_to_file(const ir::OperandIndexMap<std::unique_ptr<Operand>> &operand_nodes,
+ const ir::OperationIndexMap<std::unique_ptr<Operation>> &operation_nodes,
+ const std::string &tag)
+{
+ onert::dumper::dot::DotBuilder dot_builder;
for (const auto &e : operation_nodes)
dot_builder.update(*e.second);
for (const auto &e : operand_nodes)
@@ -198,6 +199,34 @@ void DotDumper::dump(const std::string &tag)
fb.close();
}
}
+} // namespace
+
+void DotDumper::dump(const ir::Graph &graph, const std::string &tag)
+{
+ if (_level == Level::OFF)
+ {
+ return;
+ }
+
+ const auto dot_operands = generate_dot_operands(graph, _level);
+ const auto dot_operations = generate_dot_operations(graph, dot_operands);
+ dump_to_file(dot_operands, dot_operations, tag);
+}
+
+// TODO Support derivative tensors
+void DotDumper::dump(const compiler::ILoweredGraph &lowered_graph, const std::string &tag)
+{
+ if (_level == Level::OFF)
+ {
+ return;
+ }
+
+ auto dot_operands = generate_dot_operands(lowered_graph.graph(), _level);
+ auto dot_operations = generate_dot_operations(lowered_graph.graph(), dot_operands);
+ update_lower_info(lowered_graph, &dot_operands);
+ update_lower_info(lowered_graph, &dot_operations);
+ dump_to_file(dot_operands, dot_operations, tag);
+}
} // namespace dot
} // namespace dumper
diff --git a/runtime/onert/core/src/dumper/dot/DotDumper.h b/runtime/onert/core/src/dumper/dot/DotDumper.h
index fdbca1642..fca5f356c 100644
--- a/runtime/onert/core/src/dumper/dot/DotDumper.h
+++ b/runtime/onert/core/src/dumper/dot/DotDumper.h
@@ -15,7 +15,7 @@
*/
#include "ir/Graph.h"
-#include "compiler/LoweredGraph.h"
+#include "compiler/ILoweredGraph.h"
#ifndef __ONERT_DUMPER_DOT_DOT_DUMPER_H__
#define __ONERT_DUMPER_DOT_DOT_DUMPER_H__
@@ -38,27 +38,28 @@ public:
};
public:
- DotDumper(const ir::Graph &graph, Level level)
- : _lowered_graph{nullptr}, _graph(graph), _level{level}
- {
- }
- DotDumper(const compiler::LoweredGraph *lowered_graph, Level level)
- : _lowered_graph{lowered_graph}, _graph(_lowered_graph->graph()), _level{level}
- {
- }
+ DotDumper(Level level) : _level{level} {}
public:
/**
- * @brief Dump to dot file as tag name if "GRAPH_DOT_DUMP" is set
+ * @brief Dump graph information to dot file as tag name if "GRAPH_DOT_DUMP" is set
+ *
+ * @param[in] graph The graph that would be used to get operations and operands
+ * @param[in] tag The name of dot file that would be created
+ * @return N/A
+ */
+ void dump(const ir::Graph &graph, const std::string &tag);
+
+ /**
+ * @brief Dump lowered graph information to dot file as tag name if "GRAPH_DOT_DUMP" is set
*
+ * @param[in] graph The graph that would be used to get operations and operands
* @param[in] tag The name of dot file that would be created
* @return N/A
*/
- void dump(const std::string &tag);
+ void dump(const compiler::ILoweredGraph &lowered_graph, const std::string &tag);
private:
- const compiler::LoweredGraph *_lowered_graph;
- const ir::Graph &_graph;
Level _level;
};
diff --git a/runtime/onert/core/src/dumper/dot/DotSubgraphInfo.cc b/runtime/onert/core/src/dumper/dot/DotSubgraphInfo.cc
deleted file mode 100644
index 52e9c758d..000000000
--- a/runtime/onert/core/src/dumper/dot/DotSubgraphInfo.cc
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DotSubgraphInfo.h"
-
-#include <sstream>
-
-namespace onert
-{
-namespace dumper
-{
-namespace dot
-{
-
-DotSubgraphInfo::DotSubgraphInfo(const ir::OpSequenceIndex &index, const ir::OpSequence &op_seq,
- const util::Set<ir::OperandIndex> &shown_operands,
- const ir::Operations &operations_ctx)
- : _index{index}
-{
- for (const auto &op_idx : op_seq.operations())
- {
- _operations.insert(op_idx);
- const auto &node = operations_ctx.at(op_idx);
- for (auto o : node.getInputs())
- {
- // Must be a shown operand, not op_seq's inputs
- if (shown_operands.contains(o) && !op_seq.getInputs().contains(o))
- {
- _operands.insert(o);
- }
- }
- for (auto o : node.getOutputs())
- {
- // Must be a shown operand, not op_seq's inputs
- if (shown_operands.contains(o) && !op_seq.getOutputs().contains(o))
- {
- _operands.insert(o);
- }
- }
- }
-}
-
-} // namespace dot
-} // namespace dumper
-} // namespace onert
diff --git a/runtime/onert/core/src/dumper/dot/DotSubgraphInfo.h b/runtime/onert/core/src/dumper/dot/DotSubgraphInfo.h
deleted file mode 100644
index 95ba8953e..000000000
--- a/runtime/onert/core/src/dumper/dot/DotSubgraphInfo.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_CORE_DUMPER_DOT_DOT_SUBGRAPH_INFO_H__
-#define __ONERT_CORE_DUMPER_DOT_DOT_SUBGRAPH_INFO_H__
-
-#include <unordered_set>
-
-#include "ir/Index.h"
-#include <ir/Operations.h>
-#include "ir/OpSequence.h"
-#include "util/Set.h"
-
-namespace onert
-{
-namespace dumper
-{
-namespace dot
-{
-
-class DotSubgraphInfo
-{
-public:
- DotSubgraphInfo(const ir::OpSequenceIndex &index, const ir::OpSequence &op_seq,
- const util::Set<ir::OperandIndex> &shown_operands,
- const ir::Operations &operations_ctx);
-
- ir::OpSequenceIndex index() const { return _index; }
- std::string label() const { return _label; }
- void label(const std::string &val) { _label = val; }
- std::string fillcolor() const { return _fillcolor; }
- void fillcolor(const std::string &val) { _fillcolor = val; }
- const std::unordered_set<ir::OperationIndex> &operations() const { return _operations; }
- const std::unordered_set<ir::OperandIndex> &operands() const { return _operands; }
-
-private:
- ir::OpSequenceIndex _index;
- std::string _label;
- std::string _fillcolor;
- std::unordered_set<ir::OperationIndex> _operations;
- std::unordered_set<ir::OperandIndex> _operands;
-};
-
-} // namespace dot
-} // namespace dumper
-} // namespace onert
-
-#endif // __ONERT_CORE_DUMPER_DOT_DOT_SUBGRAPH_INFO_H__
diff --git a/runtime/onert/core/src/dumper/dot/OperandNode.cc b/runtime/onert/core/src/dumper/dot/OperandNode.cc
index 5a6015ca9..49319d595 100644
--- a/runtime/onert/core/src/dumper/dot/OperandNode.cc
+++ b/runtime/onert/core/src/dumper/dot/OperandNode.cc
@@ -18,7 +18,6 @@
#include "OperandNode.h"
#include "ir/Graph.h"
-#include "ir/operand/LowerInfo.h"
namespace onert
{
@@ -33,10 +32,10 @@ const std::string Operand::OPERAND_SHAPE = "ellipse";
const std::string Operand::BG_COLOR_SCHEME = "set18";
Operand::Operand(const ir::OperandIndex &index, Type type)
- : Node{"operand" + std::to_string(index.value())}
+ : Node{"operand" + std::to_string(index.value())}
{
{
- auto type_to_shape = [](Type type) {
+ auto type_to_shape = [](Type type) -> const auto & {
switch (type)
{
case Type::MODEL_INPUT:
diff --git a/runtime/onert/core/src/dumper/dot/OperandNode.h b/runtime/onert/core/src/dumper/dot/OperandNode.h
index 2e7cc5861..f2aea80ad 100644
--- a/runtime/onert/core/src/dumper/dot/OperandNode.h
+++ b/runtime/onert/core/src/dumper/dot/OperandNode.h
@@ -64,7 +64,6 @@ public:
*
* @param[in] index Operand index
* @param[in] type Operand type
- * @param[in] lower_info Operand LowerInfo
*/
Operand(const ir::OperandIndex &index, Type type);
diff --git a/runtime/onert/core/src/dumper/dot/OperationNode.cc b/runtime/onert/core/src/dumper/dot/OperationNode.cc
index bee137e7c..2ef08c9c6 100644
--- a/runtime/onert/core/src/dumper/dot/OperationNode.cc
+++ b/runtime/onert/core/src/dumper/dot/OperationNode.cc
@@ -18,7 +18,6 @@
#include "OperationNode.h"
#include "ir/Graph.h"
-#include "ir/operation/LowerInfo.h"
#include "backend/IConfig.h"
#include "backend/Backend.h"
@@ -32,8 +31,8 @@ namespace dot
const std::string Operation::OPERATION_SHAPE = "rect";
const std::string Operation::BG_COLOR_SCHEME = "pastel18";
-Operation::Operation(const ir::OperationIndex &index, const ir::Operation &node)
- : Node{"operation" + std::to_string(index.value())}
+Operation::Operation(const ir::OperationIndex &index, const ir::IOperation &node)
+ : Node{"operation" + std::to_string(index.value())}
{
setAttribute("label", std::to_string(index.value()) + " : " + node.name());
setAttribute("shape", OPERATION_SHAPE);
diff --git a/runtime/onert/core/src/dumper/dot/OperationNode.h b/runtime/onert/core/src/dumper/dot/OperationNode.h
index 74a37d3fb..d9292ad0c 100644
--- a/runtime/onert/core/src/dumper/dot/OperationNode.h
+++ b/runtime/onert/core/src/dumper/dot/OperationNode.h
@@ -25,7 +25,7 @@
#define __ONERT_DUMPER_DOT_DOT_NODE_INFO_H__
#include "Node.h"
-#include "ir/Operation.h"
+#include "ir/IOperation.h"
#include "ir/Index.h"
namespace onert
@@ -52,7 +52,7 @@ public:
* @param[in] index operation index
* @param[in] node operation object
*/
- Operation(const ir::OperationIndex &index, const ir::Operation &node);
+ Operation(const ir::OperationIndex &index, const ir::IOperation &node);
};
} // namespace dot
diff --git a/runtime/onert/core/src/compiler/ParamChecker.cc b/runtime/onert/core/src/dumper/h5/Dumper.cc
index c4f80f087..5e12c2dbb 100644
--- a/runtime/onert/core/src/compiler/ParamChecker.cc
+++ b/runtime/onert/core/src/dumper/h5/Dumper.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,20 +14,21 @@
* limitations under the License.
*/
-#include "ParamChecker.h"
+#include "Dumper.h"
-#include "ir/Graph.h"
+#include <iostream>
+#include <sstream>
+#include <stdexcept>
namespace onert
{
-namespace compiler
+namespace dumper
{
-
-void ParamChecker::operator()()
+namespace h5
{
- _model->operations().iterate(
- [&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); });
-}
-} // namespace compiler
+Dumper::Dumper(const std::string &filepath) : _file{filepath, H5F_ACC_CREAT | H5F_ACC_RDWR} {}
+
+} // namespace h5
+} // namespace dumper
} // namespace onert
diff --git a/runtime/onert/core/src/dumper/h5/Dumper.h b/runtime/onert/core/src/dumper/h5/Dumper.h
new file mode 100644
index 000000000..53d0e0332
--- /dev/null
+++ b/runtime/onert/core/src/dumper/h5/Dumper.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_DUMPER_H5_DUMPER_H__
+#define __ONERT_DUMPER_H5_DUMPER_H__
+
+#include "exec/MinMaxMap.h"
+
+#include <H5Cpp.h>
+#include <string>
+
+namespace onert
+{
+namespace dumper
+{
+namespace h5
+{
+
+class Dumper
+{
+public:
+ /**
+ * @brief Construct dumper
+ *
+ * @param[in] path filepath to dump
+ * @throw H5::FileIException on error during file open/create
+ */
+ Dumper(const std::string &filepath);
+
+protected:
+ H5::H5File _file;
+};
+
+} // namespace h5
+} // namespace dumper
+} // namespace onert
+
+#endif // __ONERT_DUMPER_H5_DUMPER_H__
diff --git a/runtime/onert/core/src/dumper/h5/MinMaxDumper.cc b/runtime/onert/core/src/dumper/h5/MinMaxDumper.cc
new file mode 100644
index 000000000..8a9de9f95
--- /dev/null
+++ b/runtime/onert/core/src/dumper/h5/MinMaxDumper.cc
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MinMaxDumper.h"
+
+#include <iostream>
+#include <sstream>
+#include <stdexcept>
+
+namespace onert
+{
+namespace dumper
+{
+namespace h5
+{
+
+static const char *h5_value_grpname = "value";
+
+/*
+ * ensure grp_name exists in parent
+ */
+H5::Group ensureGroup(H5::Group parent, const char *child)
+{
+ H5::Exception::dontPrint();
+ try
+ {
+ return parent.openGroup(child);
+ }
+ catch (H5::Exception &e)
+ {
+ return parent.createGroup(child);
+ }
+}
+
+MinMaxDumper::MinMaxDumper(const std::string &filepath) : Dumper(filepath)
+{
+ auto root_grp = _file.openGroup("/");
+ ensureGroup(root_grp, h5_value_grpname);
+}
+
+void MinMaxDumper::dump(const exec::SMMinMaxMap &mmmap) const
+{
+ auto val_grp = _file.openGroup(h5_value_grpname);
+ auto num_run = val_grp.getNumObjs();
+ auto num_grp = val_grp.createGroup(std::to_string(num_run));
+ auto model_grp = ensureGroup(num_grp, "0");
+ hsize_t dims[] = {2};
+ H5::DataSpace dspace(1, dims); // rank=1, dim(0)=2, {min, max}
+ for (auto &&e : mmmap)
+ {
+ // key = {subg_idx, op_idx} = e.first
+ const auto subg_idx = e.first.first.value();
+ const auto op_idx = e.first.second.value();
+ auto subg_grp = ensureGroup(model_grp, std::to_string(subg_idx).c_str());
+ auto op_dset = subg_grp.createDataSet(std::to_string(op_idx), H5::PredType::IEEE_F32BE, dspace);
+ op_dset.write(e.second.data, H5::PredType::NATIVE_FLOAT);
+ }
+}
+
+} // namespace h5
+} // namespace dumper
+} // namespace onert
diff --git a/runtime/onert/core/src/dumper/h5/MinMaxDumper.h b/runtime/onert/core/src/dumper/h5/MinMaxDumper.h
new file mode 100644
index 000000000..1f1b27c6e
--- /dev/null
+++ b/runtime/onert/core/src/dumper/h5/MinMaxDumper.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_DUMPER_H5_MINMAX_DUMPER_H__
+#define __ONERT_DUMPER_H5_MINMAX_DUMPER_H__
+
+#include "exec/MinMaxMap.h"
+#include "Dumper.h"
+
+#include <H5Cpp.h>
+#include <string>
+
+namespace onert
+{
+namespace dumper
+{
+namespace h5
+{
+
+// The hierachy of single model minmax h5 file
+//
+// GROUP /
+// GROUP value
+// └── GROUP run_idx
+// └── GROUP model_idx
+// └── GROUP subg_idx
+// └── DATASET op_idx
+// DATATYPE Float32
+// DATASPACE (2)
+// DATA { min, max }
+// GROUP name (optional, for debug)
+// └── GROUP model_idx
+// └── GROUP subg_idx
+// └── ATTRIBUTE op_idx
+// DATATYPE String
+// DATA { "model/your/op/name"}
+//
+class MinMaxDumper : private Dumper
+{
+public:
+ MinMaxDumper(const std::string &filepath);
+ /**
+ * @brief Dump minmax map
+ *
+ * @param[in] map single model minmax map
+ */
+ void dump(const exec::SMMinMaxMap &map) const;
+
+private:
+ H5::Group _val_grp;
+};
+
+} // namespace h5
+} // namespace dumper
+} // namespace onert
+
+#endif // __ONERT_DUMPER_H5_MINMAX_DUMPER_H__
diff --git a/runtime/onert/core/src/dumper/text/GraphDumper.cc b/runtime/onert/core/src/dumper/text/GraphDumper.cc
new file mode 100644
index 000000000..6bd7904aa
--- /dev/null
+++ b/runtime/onert/core/src/dumper/text/GraphDumper.cc
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GraphDumper.h"
+
+#include "ir/Graph.h"
+#include "compiler/LoweredGraph.h"
+#ifdef ONERT_TRAIN
+#include "compiler/train/LoweredTrainableGraph.h"
+#endif // ONERT_TRAIN
+#include "util/logging.h"
+#include "misc/string_helpers.h"
+
+namespace onert
+{
+namespace dumper
+{
+namespace text
+{
+
+namespace
+{
+
+std::string formatOperandIndexSequence(const ir::OperandIndexSequence &seq)
+{
+ std::vector<std::string> strs;
+ for (auto &&ind : seq)
+ strs.push_back(dumper::text::formatOperandBrief(ind));
+ return nnfw::misc::join(strs.begin(), strs.end(), ", ");
+}
+
+} // namespace
+
+std::string formatOperandBrief(ir::OperandIndex ind)
+{
+ std::stringstream ss;
+ ss << ind;
+ return ss.str();
+}
+
+std::string formatOperand(const ir::Graph &, ir::OperandIndex ind)
+{
+ std::stringstream ss;
+ ss << ind;
+ // TODO Print shape, type and maybe more
+ return ss.str();
+}
+
+std::string formatOperation(const ir::IOperation &op, ir::OperationIndex ind)
+{
+ std::stringstream ss;
+
+ ss << formatOperandIndexSequence(op.getOutputs());
+ ss << " = ";
+ ss << ind << "_" << op.name() << "(";
+ ss << formatOperandIndexSequence(op.getInputs());
+ ss << ")";
+ return ss.str();
+}
+
+std::string formatOperation(const ir::Graph &graph, ir::OperationIndex ind)
+{
+ std::stringstream ss;
+ const auto &op = graph.operations().at(ind);
+ return formatOperation(op, ind);
+}
+
+void dumpGraph(const ir::Graph &graph)
+{
+ VERBOSE(GraphDumper) << "{\n";
+ auto ops_topol = graph.topolSortOperations();
+ for (auto &&op_ind : ops_topol)
+ {
+ const auto &op = graph.operations().at(op_ind);
+ VERBOSE(GraphDumper) << " " << formatOperation(op, op_ind) << "\n";
+ }
+ VERBOSE(GraphDumper) << "}\n";
+ VERBOSE(GraphDumper) << std::endl;
+}
+
+void dumpLoweredGraph(const compiler::LoweredGraph &lgraph)
+{
+ // TODO Graph dump with backend info
+ dumpGraph(lgraph.graph());
+}
+
+#ifdef ONERT_TRAIN
+void dumpLoweredGraph(const compiler::train::LoweredTrainableGraph &lgraph)
+{
+ // TODO Graph dump with backend info
+ dumpGraph(lgraph.graph());
+}
+#endif // ONERT_TRAIN
+
+} // namespace text
+} // namespace dumper
+} // namespace onert
diff --git a/runtime/onert/core/src/dumper/text/GraphDumper.h b/runtime/onert/core/src/dumper/text/GraphDumper.h
new file mode 100644
index 000000000..ab0061465
--- /dev/null
+++ b/runtime/onert/core/src/dumper/text/GraphDumper.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_DUMPER_TEXT_GRAPH_DUMPER_H__
+#define __ONERT_DUMPER_TEXT_GRAPH_DUMPER_H__
+
+#include <ir/Index.h>
+
+namespace onert
+{
+namespace ir
+{
+class Graph;
+struct IOperation;
+} // namespace ir
+} // namespace onert
+
+namespace onert
+{
+namespace compiler
+{
+class LoweredGraph;
+
+#ifdef ONERT_TRAIN
+namespace train
+{
+class LoweredTrainableGraph;
+} // namespace train
+#endif // ONERT_TRAIN
+} // namespace compiler
+} // namespace onert
+
+namespace onert
+{
+namespace dumper
+{
+namespace text
+{
+
+std::string formatOperandBrief(ir::OperandIndex ind);
+std::string formatOperand(const ir::Graph &, ir::OperandIndex ind);
+std::string formatOperation(const ir::Graph &graph, ir::OperationIndex ind);
+void dumpGraph(const ir::Graph &graph);
+void dumpLoweredGraph(const compiler::LoweredGraph &lgraph);
+#ifdef ONERT_TRAIN
+void dumpLoweredGraph(const compiler::train::LoweredTrainableGraph &lgraph);
+#endif // ONERT_TRAIN
+
+} // namespace text
+} // namespace dumper
+} // namespace onert
+
+#endif // __ONERT_DUMPER_TEXT_GRAPH_DUMPER_H__
diff --git a/runtime/onert/core/src/exec/DataflowExecutor.cc b/runtime/onert/core/src/exec/DataflowExecutor.cc
index a69ae9cdb..e0b00077f 100644
--- a/runtime/onert/core/src/exec/DataflowExecutor.cc
+++ b/runtime/onert/core/src/exec/DataflowExecutor.cc
@@ -54,14 +54,13 @@ void DataflowExecutor::emplaceToReadyJobs(const uint32_t &id)
{
auto &job = _waiting_jobs[id];
assert(job != nullptr);
- auto &op_seq = _lowered_graph->op_seqs().at(_job_to_op_seq[job->index()]);
- auto rank = calculateRank(op_seq.operations());
+ auto rank = calculateRank({_job_to_op[job->index()]});
_ready_jobs.emplace(rank, std::move(job));
}
void DataflowExecutor::notify(uint32_t finished_job_id)
{
- for (auto id : _output_info[finished_job_id])
+ for (auto &&id : _output_info[finished_job_id])
{
assert(_input_info[id] > 0);
auto count = --_input_info[id];
@@ -77,52 +76,49 @@ bool DataflowExecutor::noWaitingJobs()
[](const std::unique_ptr<Job> &job) { return job == nullptr; });
}
-DataflowExecutor::DataflowExecutor(
- std::unique_ptr<compiler::LoweredGraph> lowered_graph,
- const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
- const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorRegistries &tensor_regs, backend::TensorManagerSet &&tensor_mgrs,
- compiler::CodeMap &&code_map)
- : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
- std::move(tensor_mgrs)},
- _code_map{std::move(code_map)}
+DataflowExecutor::DataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
+ backend::BackendContexts &&backend_contexts,
+ const compiler::TensorRegistries &tensor_regs,
+ compiler::CodeMap &&code_map,
+ const util::TracingCtx *tracing_ctx)
+ : ExecutorBase{std::move(lowered_graph), std::move(backend_contexts), tensor_regs, tracing_ctx},
+ _code_map{std::move(code_map)}
{
VERBOSE(DataflowExecutor) << "Constructing Dataflow Executor" << std::endl;
- const auto &op_seqs = _lowered_graph->op_seqs();
- // Assign jobs convert OpSequenceIndex to job index(uint32_t)
+ // Assign jobs convert OperationIndex to job index(uint32_t)
uint32_t next_job_index = 0;
- std::unordered_map<ir::OpSequenceIndex, uint32_t> op_seq_to_job;
- op_seqs.iterate([&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &) {
- VERBOSE(DataflowExecutor) << "Create a job #" << next_job_index << " with OpSequenceIndex "
- << op_seq_index.value() << std::endl;
+ std::unordered_map<ir::OperationIndex, uint32_t> op_to_job;
+ const auto &operations = _lowered_graph->graph().operations();
+ operations.iterate([&](const ir::OperationIndex &op_ind, const ir::IOperation &) {
+ VERBOSE(DataflowExecutor) << "Create a job " << next_job_index << " with Operation " << op_ind
+ << std::endl;
_finished_jobs.emplace_back(
- std::make_unique<Job>(next_job_index, _code_map.at(op_seq_index).fn_seq.get()));
- op_seq_to_job[op_seq_index] = next_job_index++;
+ std::make_unique<Job>(next_job_index, _code_map.at(op_ind).fn_seq.get()));
+ op_to_job[op_ind] = next_job_index++;
});
_waiting_jobs.resize(next_job_index);
_output_info.resize(next_job_index);
_initial_input_info.resize(next_job_index, 0);
- op_seqs.iterate([&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) {
- auto job_index = op_seq_to_job[op_seq_index];
- for (auto output : op_seq.getOutputs())
+ operations.iterate([&](const ir::OperationIndex &op_ind, const ir::IOperation &op) {
+ auto job_index = op_to_job[op_ind];
+ for (auto &&output : op.getOutputs())
{
// Update output and input info
- op_seqs.iterate(
- [&](const ir::OpSequenceIndex &op_seq_cur_index, const ir::OpSequence &op_seq_cur) {
- if (op_seq_cur.getInputs().contains(output))
- {
- auto dep_index = op_seq_to_job[op_seq_cur_index];
- ++_initial_input_info[dep_index];
- _output_info[job_index].push_back(dep_index);
- }
- });
+ operations.iterate([&](const ir::OperationIndex &op_cur_ind, const ir::IOperation &op_cur) {
+ if (op_cur.getInputs().contains(output))
+ {
+ auto dep_index = op_to_job[op_cur_ind];
+ ++_initial_input_info[dep_index];
+ _output_info[job_index].push_back(dep_index);
+ }
+ });
}
});
- for (const auto &s : op_seq_to_job)
- _job_to_op_seq.emplace(s.second, s.first);
+ for (const auto &s : op_to_job)
+ _job_to_op.emplace(s.second, s.first);
_input_info = _initial_input_info;
}
@@ -145,35 +141,38 @@ void DataflowExecutor::executeImpl()
}
assert(!_ready_jobs.empty()); // Cannot begin if there is no initial jobs
- _subject.notifyModelBegin(this);
+ auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_graph);
+
+ _subject.notifySubgraphBegin(profiling_subg_index);
while (!_ready_jobs.empty())
{
auto job = std::move((_ready_jobs.begin())->second);
_ready_jobs.erase(_ready_jobs.begin());
auto job_index = job->index();
- VERBOSE(DataflowExecutor) << "Run job #" << job_index << std::endl;
+ VERBOSE(DataflowExecutor) << "Run job " << job_index << std::endl;
+
+ auto op_ind = _job_to_op[job_index];
+ const backend::Backend *backend = _lowered_graph->lower_info().operation.at(op_ind).backend();
- auto op_seq_index = _job_to_op_seq[job_index];
- auto op_seq = &_lowered_graph->op_seqs().at(op_seq_index);
- const backend::Backend *backend =
- _lowered_graph->getLowerInfo()->op_seq.at(op_seq_index)->backend();
+ _subject.notifyJobBegin(this, profiling_subg_index, op_ind, backend);
- _subject.notifyJobBegin(this, op_seq, backend);
+ job->fn_seq()->initRunning();
// check if FunctionSequence needs to handle dynamic tensor
- bool handle_dynamic_tensor = op_seq->has_dynamic_tensor() || dynamic_input_exists;
+ bool handle_dynamic_tensor =
+ _lowered_graph->getHasDynamicTensor(op_ind) || dynamic_input_exists;
job->fn_seq()->enableDynamicShapeInferer(handle_dynamic_tensor);
job->run();
- _subject.notifyJobEnd(this, op_seq, backend);
+ _subject.notifyJobEnd(this, profiling_subg_index, op_ind, backend);
notify(job_index);
_finished_jobs[job_index] = std::move(job);
}
assert(noWaitingJobs());
- _subject.notifyModelEnd(this);
+ _subject.notifySubgraphEnd(profiling_subg_index);
// Reset input info for the next execution
_input_info = _initial_input_info;
diff --git a/runtime/onert/core/src/exec/DataflowExecutor.h b/runtime/onert/core/src/exec/DataflowExecutor.h
index 8d60e3e4b..1649be733 100644
--- a/runtime/onert/core/src/exec/DataflowExecutor.h
+++ b/runtime/onert/core/src/exec/DataflowExecutor.h
@@ -17,17 +17,17 @@
#ifndef __ONERT_EXEC_DATAFLOW_EXECUTOR_H__
#define __ONERT_EXEC_DATAFLOW_EXECUTOR_H__
-#include <list>
-#include <map>
-#include <unordered_map>
-
-#include "exec/FunctionSequence.h"
+#include "ExecutorBase.h"
#include "Job.h"
+
+#include "compiler/CodeMap.h"
#include "ir/OperandIndexSequence.h"
-#include "ir/Index.h"
+#include "util/TracingCtx.h"
+
+#include <list>
+#include <map>
#include <memory>
-#include "exec/ExecutorBase.h"
-#include "compiler/CodeMap.h"
+#include <unordered_map>
namespace onert
{
@@ -47,13 +47,12 @@ public:
*
* @param lowered_graph LoweredGraph object
* @param tensor_builders Tensor builders that are currently used
- * @param code_map OpSequence and its code map
+ * @param code_map @c ir::Operation and its code map
*/
DataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
- const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
- const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorRegistries &tensor_regs,
- backend::TensorManagerSet &&tensor_mgrs, compiler::CodeMap &&code_map);
+ backend::BackendContexts &&backend_contexts,
+ const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map,
+ const util::TracingCtx *tracing_ctx);
void executeImpl() override;
@@ -88,7 +87,7 @@ protected:
std::multimap<int64_t, std::unique_ptr<Job>, std::greater<int64_t>> _ready_jobs;
/// @brief Which job runs which op and function.
- std::unordered_map<uint32_t, ir::OpSequenceIndex> _job_to_op_seq;
+ std::unordered_map<uint32_t, ir::OperationIndex> _job_to_op;
};
} // namespace exec
diff --git a/runtime/onert/core/src/exec/DynamicShapeInference.cc b/runtime/onert/core/src/exec/DynamicShapeInferer.cc
index 70bddfce4..4cbf2fe64 100644
--- a/runtime/onert/core/src/exec/DynamicShapeInference.cc
+++ b/runtime/onert/core/src/exec/DynamicShapeInferer.cc
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "exec/DynamicShapeInference.h"
+#include "exec/DynamicShapeInferer.h"
#include "util/ShapeInference.h"
#include <assert.h>
@@ -23,14 +23,6 @@ namespace onert
namespace exec
{
-inline backend::IDynamicTensorManager *
-dynamicTensorManagerOf(const std::shared_ptr<backend::ITensor> &tensor)
-{
- if (!tensor->dynamic_tensor_manager())
- throw std::runtime_error{"Dynamic Tensor Manager is not available for this tensor."};
- return tensor->dynamic_tensor_manager();
-}
-
void DynamicShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op,
const ir::OperandIndex lhs_idx,
const ir::OperandIndex rhs_idx)
@@ -56,15 +48,15 @@ void DynamicShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op,
So, only when all inputs are static, we can skip dynamic shape inference.
*/
- if ((!lhs->is_dynamic()) && (!rhs->is_dynamic()))
- return;
-
auto output_idx = op.getOutputs().at(0);
auto output = _tensor_registry->getITensor(output_idx);
+ if ((currently_static(lhs) && currently_static(rhs)) && previously_static(output))
+ return;
+
ir::Shape new_shape = shape_inference::inferEltwiseShape(lhs_shape, rhs_shape);
- dynamicTensorManagerOf(output)->applyShape(output_idx, new_shape);
+ output->applyShape(new_shape);
assert(output->buffer() != nullptr);
}
@@ -96,30 +88,32 @@ void DynamicShapeInferer::handleSimpleUnaryOp(const ir::Operation &op,
auto output_ind = op.getOutputs().at(0);
auto output = _tensor_registry->getITensor(output_ind);
- dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
+ output->applyShape(output_shape);
assert(output->buffer() != nullptr);
}
-void DynamicShapeInferer::visit(const ir::operation::ArgMax &op)
+void DynamicShapeInferer::visit(const ir::operation::ArgMinMax &op)
{
- const auto input_idx{op.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
- const auto &input = _tensor_registry->getITensor(input_idx);
- auto input_shape = input->getShape();
+ const auto input_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)};
+ const auto input = _tensor_registry->getITensor(input_idx);
- if (!input->is_dynamic())
- return;
-
- const auto rank = input_shape.rank();
- const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
-
- assert(0 <= axis && axis < rank);
+ const auto axis_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)};
+ const auto axis = _tensor_registry->getITensor(axis_idx);
auto output_ind = op.getOutputs().at(0);
auto output = _tensor_registry->getITensor(output_ind);
- ir::Shape new_shape = shape_inference::inferArgMaxShape(input_shape, axis, rank);
+ if (!input->is_dynamic() && !output->is_dynamic())
+ return;
- dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
+ auto input_shape = input->getShape();
+ auto axis_value = *reinterpret_cast<const int32_t *>(axis->buffer());
+ const auto rank = input_shape.rank();
+ axis_value = axis_value < 0 ? axis_value + rank : axis_value;
+
+ ir::Shape new_shape = shape_inference::inferArgMinMaxShape(input_shape, axis_value, rank);
+
+ output->applyShape(new_shape);
assert(output->buffer() != nullptr);
}
@@ -141,7 +135,68 @@ void DynamicShapeInferer::visit(const ir::operation::BatchMatMul &op)
// TODO
auto new_shape = shape_inference::inferBatchMatMulShape(lhs_shape, rhs_shape, op.param());
- dynamicTensorManagerOf(output)->applyShape(output_index, new_shape);
+ output->applyShape(new_shape);
+}
+
+void DynamicShapeInferer::visit(const ir::operation::BCQFullyConnected &op)
+{
+ const auto input_idx{op.getInputs().at(ir::operation::BCQFullyConnected::Input::INPUT)};
+ const auto &input = _tensor_registry->getITensor(input_idx);
+
+ const auto cluster_idx{
+ op.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)};
+ const auto &cluster = _tensor_registry->getITensor(cluster_idx);
+ assert(cluster->is_constant());
+
+ if (!input->is_dynamic())
+ return;
+
+ auto input_shape = input->getShape();
+ auto cluster_shape = cluster->getShape();
+
+ auto cluster_buf = reinterpret_cast<const int32_t *>(cluster->buffer());
+ assert(cluster_buf);
+
+ ir::Shape new_shape =
+ shape_inference::inferBCQFullyConnectedShape(input_shape, cluster_shape, cluster_buf);
+
+ auto output_ind = op.getOutputs().at(0);
+ auto output = _tensor_registry->getITensor(output_ind);
+
+ output->applyShape(new_shape);
+ assert(output->buffer() != nullptr);
+}
+
+void DynamicShapeInferer::visit(const ir::operation::BCQGather &op)
+{
+ const auto indices_idx{op.getInputs().at(ir::operation::BCQGather::Input::INDICES)};
+ const auto &indices = _tensor_registry->getITensor(indices_idx);
+
+ const auto input_binary_idx{op.getInputs().at(ir::operation::BCQGather::Input::INPUT_BINARY)};
+ const auto &input_binary = _tensor_registry->getITensor(input_binary_idx);
+
+ const auto cluster_idx{op.getInputs().at(ir::operation::BCQGather::Input::INPUT_CLUSTERS)};
+ const auto &cluster = _tensor_registry->getITensor(cluster_idx);
+ assert(cluster->is_constant());
+
+ if (!indices->is_dynamic())
+ return;
+
+ auto indices_shape = indices->getShape();
+ auto cluster_shape = cluster->getShape();
+ auto rank = input_binary->getShape().rank();
+
+ auto cluster_buf = reinterpret_cast<const int32_t *>(cluster->buffer());
+ assert(cluster_buf);
+
+ ir::Shape new_shape = shape_inference::inferBCQGatherShape(indices_shape, cluster_shape,
+ cluster_buf, rank, op.param());
+
+ auto output_ind = op.getOutputs().at(0);
+ auto output = _tensor_registry->getITensor(output_ind);
+
+ output->applyShape(new_shape);
+ assert(output->buffer() != nullptr);
}
void DynamicShapeInferer::visit(const ir::operation::BinaryArithmetic &op)
@@ -167,10 +222,10 @@ void DynamicShapeInferer::visit(const ir::operation::BroadcastTo &op)
assert(shape); // It shouldn't be 0.
auto output_shape = shape_inference::inferBroadcastToShape(
- shape->getShape(), reinterpret_cast<const int32_t *>(shape->buffer()));
+ shape->getShape(), reinterpret_cast<const int32_t *>(shape->buffer()));
// set output shape and output buffer
- dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
+ output->applyShape(output_shape);
assert(output->buffer() != nullptr);
}
@@ -198,7 +253,7 @@ void DynamicShapeInferer::visit(const ir::operation::Concat &op)
So, only when all inputs are static, we can skip dynamic shape inference.
*/
bool all_static = true;
- for (auto input_ind : op.getInputs())
+ for (auto &&input_ind : op.getInputs())
{
auto input = _tensor_registry->getITensor(input_ind);
if (input->is_dynamic())
@@ -215,15 +270,17 @@ void DynamicShapeInferer::visit(const ir::operation::Concat &op)
{
auto isConcatible = [](const backend::ITensor *input1, const backend::ITensor *input2,
int32_t axis) {
- if (input1->num_dimensions() != input2->num_dimensions())
+ auto shape1 = input1->getShape();
+ auto shape2 = input2->getShape();
+ if (shape1.rank() != shape2.rank())
return false;
- for (size_t i = 0; i < input1->num_dimensions(); i++)
+ for (int i = 0; i < shape1.rank(); i++)
{
- auto positive_axis = (axis >= 0) ? axis : axis + input1->num_dimensions();
+ auto positive_axis = (axis >= 0) ? axis : axis + input1->getShape().rank();
if (i != positive_axis)
- if (input1->dimension(i) != input2->dimension(i))
+ if (shape1.dim(i) != shape2.dim(i))
return false;
}
@@ -233,17 +290,17 @@ void DynamicShapeInferer::visit(const ir::operation::Concat &op)
auto first_input_ind = op.getInputs().at(0);
auto first_input = _tensor_registry->getITensor(first_input_ind);
- for (auto input_ind : op.getInputs())
+ for (auto &&input_ind : op.getInputs())
{
auto input = _tensor_registry->getITensor(input_ind);
- if (input != first_input && !isConcatible(first_input.get(), input.get(), op.param().axis))
+ if (input != first_input && !isConcatible(first_input, input, op.param().axis))
throw std::runtime_error("input shapes does not matched for concat");
}
}
// getting output shape
onert::shape_inference::Shapes in_shapes;
- for (auto input_ind : op.getInputs())
+ for (auto &&input_ind : op.getInputs())
{
auto input = _tensor_registry->getITensor(input_ind);
ir::Shape shape = input->getShape();
@@ -255,7 +312,7 @@ void DynamicShapeInferer::visit(const ir::operation::Concat &op)
auto output = _tensor_registry->getITensor(output_ind);
auto output_shape = shape_inference::inferConcatShape(in_shapes, op.param());
- dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
+ output->applyShape(output_shape);
}
void DynamicShapeInferer::visit(const ir::operation::Conv2D &op)
@@ -278,7 +335,7 @@ void DynamicShapeInferer::visit(const ir::operation::Conv2D &op)
ir::Shape output_shape = shape_inference::inferConv2DShape(input_shape, ker_shape, op.param());
- dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
+ output->applyShape(output_shape);
assert(output->buffer() != nullptr);
}
@@ -333,12 +390,18 @@ void DynamicShapeInferer::visit(const ir::operation::ExpandDims &op)
auto axis_ind = op.getInputs().at(ir::operation::ExpandDims::AXIS);
auto axis = _tensor_registry->getITensor(axis_ind);
- auto axis_buf = reinterpret_cast<const int32_t *>(axis->buffer());
- assert(axis_buf);
+ auto axis_type = axis->data_type();
+ assert(axis_type == ir::DataType::INT32 || axis_type == ir::DataType::INT64);
+
+ assert(axis->buffer());
+ int32_t axis_value =
+ (axis_type == ir::DataType::INT32)
+ ? reinterpret_cast<const int32_t *>(axis->buffer())[0]
+ : static_cast<int32_t>(reinterpret_cast<const int64_t *>(axis->buffer())[0]);
- auto output_shape = shape_inference::inferExpandDimsShape(input_shape, axis_buf[0]);
+ auto output_shape = shape_inference::inferExpandDimsShape(input_shape, axis_value);
- dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
+ output->applyShape(output_shape);
assert(output->buffer() != nullptr);
}
@@ -347,21 +410,26 @@ void DynamicShapeInferer::visit(const ir::operation::Fill &op)
// check if output is not dynamic
auto output_ind = op.getOutputs().at(0);
auto output = _tensor_registry->getITensor(output_ind);
- auto input_ind = op.getInputs().at(ir::operation::Fill::Input::INPUT);
- auto input = _tensor_registry->getITensor(input_ind);
- ir::Shape input_shape = input->getShape();
+ auto shape_ind = op.getInputs().at(ir::operation::Fill::Input::SHAPE);
+ auto shape = _tensor_registry->getITensor(shape_ind);
- if ((!input->is_dynamic()) && (!output->is_dynamic()))
+ if ((!shape->is_dynamic()) && (!output->is_dynamic()))
return;
- assert(input.get()->data_type() == ir::DataType::INT32);
+ const auto dims_type = shape->data_type();
+ assert(dims_type == ir::DataType::INT32 || dims_type == ir::DataType::INT64);
- auto input_buf = reinterpret_cast<const int32_t *>(input->buffer());
- assert(input_buf);
+ auto dims_buf = shape->buffer();
+ assert(dims_buf);
- auto output_shape = shape_inference::inferFillShape(input_shape, input_buf);
+ const auto &dims_shape = shape->getShape();
+ const auto &output_shape = ((dims_type == ir::DataType::INT32)
+ ? shape_inference::inferFillShape<int32_t>(
+ dims_shape, reinterpret_cast<const int32_t *>(dims_buf))
+ : shape_inference::inferFillShape<int64_t>(
+ dims_shape, reinterpret_cast<const int64_t *>(dims_buf)));
- dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
+ output->applyShape(output_shape);
assert(output->buffer() != nullptr);
}
@@ -384,7 +452,7 @@ void DynamicShapeInferer::visit(const ir::operation::FullyConnected &op)
auto output_ind = op.getOutputs().at(0);
auto output = _tensor_registry->getITensor(output_ind);
- dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
+ output->applyShape(new_shape);
assert(output->buffer() != nullptr);
}
@@ -416,7 +484,7 @@ void DynamicShapeInferer::visit(const ir::operation::Gather &op)
auto output_ind = op.getOutputs().at(0);
auto output = _tensor_registry->getITensor(output_ind);
- dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
+ output->applyShape(new_shape);
assert(output->buffer() != nullptr);
}
@@ -425,11 +493,122 @@ void DynamicShapeInferer::visit(const ir::operation::L2Normalization &op)
handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::L2Normalization::INPUT));
}
+void DynamicShapeInferer::visit(const ir::operation::LSTM &op)
+{
+ const auto output_index{op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
+ auto output = _tensor_registry->getITensor(output_index);
+
+ const auto output_state_out_index{
+ op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
+
+ const auto cell_state_out_index{op.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
+
+ const auto scratch_buffer_index{op.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
+
+ if (!output->is_dynamic() &&
+ !(_tensor_registry->getITensor(output_state_out_index) != nullptr &&
+ _tensor_registry->getITensor(output_state_out_index)->is_dynamic()) &&
+ !(_tensor_registry->getITensor(cell_state_out_index) != nullptr &&
+ _tensor_registry->getITensor(cell_state_out_index)->is_dynamic()) &&
+ !(_tensor_registry->getITensor(scratch_buffer_index) != nullptr &&
+ _tensor_registry->getITensor(cell_state_out_index)->is_dynamic()))
+ return;
+
+ const auto input_index{op.getInputs().at(ir::operation::LSTM::Input::INPUT)};
+ const auto input = _tensor_registry->getITensor(input_index);
+ const auto input_shape = input->getShape();
+
+ const auto input_to_output_weights_index{
+ op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
+ const auto input_to_output_weights = _tensor_registry->getITensor(input_to_output_weights_index);
+ const auto input_to_output_weights_shape = input_to_output_weights->getShape();
+
+ const auto recurrent_to_output_weights_index{
+ op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
+ const auto recurrent_to_output_weights =
+ _tensor_registry->getITensor(recurrent_to_output_weights_index);
+ const auto recurrent_to_output_weights_shape = recurrent_to_output_weights->getShape();
+
+ // re-sizing outputs
+ const int n_batch =
+ (input_shape.rank() == 3 && op.param().time_major) ? input_shape.dim(1) : input_shape.dim(0);
+ const int n_cell = input_to_output_weights_shape.dim(0);
+ const int n_output = recurrent_to_output_weights_shape.dim(1);
+ if (input_shape.rank() == 3)
+ {
+ if (op.param().time_major)
+ output->applyShape(ir::Shape{input_shape.dim(0), n_batch, n_output});
+ else
+ output->applyShape(ir::Shape{n_batch, input_shape.dim(1), n_output});
+ }
+ else
+ {
+ assert(input_shape.rank() == 2);
+ output->applyShape(ir::Shape{n_batch, n_output});
+ }
+ assert(output->buffer() != nullptr);
+
+ auto output_state_out = _tensor_registry->getITensor(output_state_out_index);
+ if (output_state_out != nullptr)
+ {
+ output_state_out->applyShape(ir::Shape{n_batch, n_output});
+ assert(output_state_out->buffer() != nullptr);
+ }
+
+ auto cell_state_out = _tensor_registry->getITensor(cell_state_out_index);
+ if (cell_state_out != nullptr)
+ {
+ cell_state_out->applyShape(ir::Shape{n_batch, n_cell});
+ assert(cell_state_out->buffer() != nullptr);
+ }
+
+ auto scratch_buffer = _tensor_registry->getITensor(scratch_buffer_index);
+ if (scratch_buffer != nullptr)
+ {
+ const auto input_to_input_weights_index{
+ op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)};
+ const auto recurrent_to_input_weights_index{
+ op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)};
+
+ const auto input_to_input_weights_shape =
+ _tensor_registry->getITensor(input_to_input_weights_index)->getShape();
+ bool has_input_to_input_weights =
+ input_to_input_weights_shape.dim(0) != 0 && input_to_input_weights_shape.dim(1) != 0;
+
+ const auto recurrent_to_input_weights_shape =
+ _tensor_registry->getITensor(recurrent_to_input_weights_index)->getShape();
+ bool has_recurrent_to_input_weights =
+ recurrent_to_input_weights_shape.dim(0) != 0 && recurrent_to_input_weights_shape.dim(1) != 0;
+
+ // NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG).
+ // true: no CIFG
+ // false: CIFG
+ bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
+ if (has_cifg_param)
+ {
+ scratch_buffer->applyShape(ir::Shape{n_batch, n_cell * 4});
+ }
+ else
+ {
+ scratch_buffer->applyShape(ir::Shape{n_batch, n_cell * 3});
+ }
+ assert(scratch_buffer->buffer() != nullptr);
+ }
+}
+
void DynamicShapeInferer::visit(const ir::operation::MatrixBandPart &op)
{
handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::MatrixBandPart::INPUT));
}
+void DynamicShapeInferer::visit(const ir::operation::DetectionPostProcess & /* op */)
+{
+ // NOTE DetectionPostProcess's undefined outputs' shape are decided on compile time
+ // by static shape inferer.
+ // DetectionPostProcess's outputs' shape are independent with input shape
+ // and decided by parameter value.
+}
+
void DynamicShapeInferer::visit(const ir::operation::OneHot &op)
{
auto output_ind = op.getOutputs().at(0);
@@ -452,7 +631,7 @@ void DynamicShapeInferer::visit(const ir::operation::OneHot &op)
const auto axis_val = op.param().axis;
ir::Shape new_shape = shape_inference::inferOnehotShape(indices_shape, *depth_buf, axis_val);
- dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
+ output->applyShape(new_shape);
assert(output->buffer() != nullptr);
}
@@ -488,7 +667,7 @@ void DynamicShapeInferer::visit(const ir::operation::Pack &op)
ir::Shape new_shape = shape_inference::inferPackShape(input_shape, axis, rank, num);
- dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
+ output->applyShape(new_shape);
assert(output->buffer() != nullptr);
}
@@ -512,10 +691,10 @@ void DynamicShapeInferer::visit(const ir::operation::Pad &op)
assert(pad_buf);
auto output_shape =
- shape_inference::inferPadShape(input->getShape(), pad_buf, pad->getShape().num_elements());
+ shape_inference::inferPadShape(input->getShape(), pad_buf, pad->getShape().num_elements());
// change output shape and reallocate output tensor memory
- dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
+ output->applyShape(output_shape);
assert(output->buffer() != nullptr);
}
@@ -556,18 +735,18 @@ void DynamicShapeInferer::visit(const ir::operation::Range &op)
if (output->data_type() == ir::DataType::FLOAT32)
{
new_shape =
- shape_inference::inferRangeShape<float>(*reinterpret_cast<float *>(start_tensor->buffer()),
- *reinterpret_cast<float *>(limit_tensor->buffer()),
- *reinterpret_cast<float *>(delta_tensor->buffer()));
+ shape_inference::inferRangeShape<float>(*reinterpret_cast<float *>(start_tensor->buffer()),
+ *reinterpret_cast<float *>(limit_tensor->buffer()),
+ *reinterpret_cast<float *>(delta_tensor->buffer()));
}
else if (output->data_type() == ir::DataType::INT32)
{
new_shape = shape_inference::inferRangeShape<int32_t>(
- *reinterpret_cast<int32_t *>(start_tensor->buffer()),
- *reinterpret_cast<int32_t *>(limit_tensor->buffer()),
- *reinterpret_cast<int32_t *>(delta_tensor->buffer()));
+ *reinterpret_cast<int32_t *>(start_tensor->buffer()),
+ *reinterpret_cast<int32_t *>(limit_tensor->buffer()),
+ *reinterpret_cast<int32_t *>(delta_tensor->buffer()));
}
- dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
+ output->applyShape(new_shape);
assert(output->buffer() != nullptr);
}
@@ -611,7 +790,7 @@ void DynamicShapeInferer::visit(const ir::operation::Reduce &op)
ir::Shape new_shape = shape_inference::inferReduceShape(input_shape, axes_vec, keep_dims);
- dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
+ output->applyShape(new_shape);
assert(output->buffer() != nullptr);
}
@@ -659,13 +838,13 @@ void DynamicShapeInferer::visit(const ir::operation::Reshape &op)
assert(new_shape_buf);
auto output_shape = shape_inference::inferReshapeShape(
- new_shape_buf, new_shape->getShape().num_elements(), input->getShape().num_elements());
+ new_shape_buf, new_shape->getShape().num_elements(), input->getShape().num_elements());
// if shape is changed, change output shape and reallocate output tensor memory
if (output_shape != output->getShape() || output->buffer() == nullptr)
{
// change on output shape
- dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
+ output->applyShape(output_shape);
}
assert(output->buffer() != nullptr);
}
@@ -681,7 +860,7 @@ void DynamicShapeInferer::visit(const ir::operation::Reshape &op)
if (output_shape != output->getShape() || output->buffer() == nullptr)
{
// change on output shape
- dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
+ output->applyShape(output_shape);
}
assert(output->buffer() != nullptr);
}
@@ -705,14 +884,35 @@ void DynamicShapeInferer::visit(const ir::operation::ResizeBilinear &op)
return;
// getting output shape from input shape and Params
- auto output_shape = shape_inference::inferResizeBilinearShape(
- input->getShape(), op.param().height_out, op.param().width_out);
+ int32_t height_out, width_out;
+ if (op.getInputs().size() == 2)
+ {
+ auto size_ind = op.getInputs().at(ir::operation::ResizeBilinear::Input::SIZE);
+ auto size = _tensor_registry->getITensor(size_ind);
+ if (size->data_type() == ir::DataType::INT32)
+ {
+ auto size_buf = reinterpret_cast<const int32_t *>(size->buffer());
+ height_out = size_buf[0];
+ width_out = size_buf[1];
+ }
+ else
+ {
+ throw std::runtime_error("DynamicShapeInferer ResizeBilinear : Unsupported data type");
+ }
+ }
+ else
+ {
+ height_out = op.param().height_out;
+ width_out = op.param().width_out;
+ }
+ auto output_shape =
+ shape_inference::inferResizeBilinearShape(input->getShape(), height_out, width_out);
// if shape is changed, change output shape and reallocate output tensor memory
if (output_shape != output->getShape() || output->buffer() == nullptr)
{
// change on output shape
- dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
+ output->applyShape(output_shape);
}
assert(output->buffer() != nullptr);
}
@@ -744,12 +944,12 @@ void DynamicShapeInferer::visit(const ir::operation::Select &op)
// Select output shpae
ir::Shape new_shape =
- shape_inference::inferSelectShape(input_cond_shape, input_true_shape, input_false_shape);
+ shape_inference::inferSelectShape(input_cond_shape, input_true_shape, input_false_shape);
auto output_ind = op.getOutputs().at(0);
auto output = _tensor_registry->getITensor(output_ind);
- dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
+ output->applyShape(new_shape);
assert(output->buffer() != nullptr);
}
@@ -768,7 +968,7 @@ void DynamicShapeInferer::visit(const ir::operation::Shape &op)
ir::Shape output_shape;
output_shape.append(input_shape.rank());
- dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
+ output->applyShape(output_shape);
assert(output->buffer() != nullptr);
}
@@ -794,7 +994,7 @@ void DynamicShapeInferer::visit(const ir::operation::Slice &op)
ir::Shape new_shape = shape_inference::inferSliceShape(input_shape, begins_buf, sizes_buf);
- dynamicTensorManagerOf(output)->applyShape(output_index, new_shape);
+ output->applyShape(new_shape);
assert(output->buffer() != nullptr);
}
@@ -829,9 +1029,9 @@ void DynamicShapeInferer::visit(const ir::operation::SpaceToBatchND &op)
auto padding_data = reinterpret_cast<int32_t *>(padding->buffer());
ir::Shape new_shape = shape_inference::inferSpaceToBatchNDShape(
- input_shape, block_shape_shape, padding_shape, block_shape_data, padding_data);
+ input_shape, block_shape_shape, padding_shape, block_shape_data, padding_data);
- dynamicTensorManagerOf(output)->applyShape(output_idx, new_shape);
+ output->applyShape(new_shape);
assert(output->buffer() != nullptr);
}
@@ -840,27 +1040,37 @@ void DynamicShapeInferer::visit(const ir::operation::Split &op)
const auto input_idx{op.getInputs().at(ir::operation::Split::Input::INPUT)};
const auto &input = _tensor_registry->getITensor(input_idx);
- if (!input->is_dynamic())
+ // Return if all tensors are not dynamic
+ bool has_dynamic = false;
+ for (const auto &output_idx : op.getOutputs())
+ {
+ auto output = _tensor_registry->getITensor(output_idx);
+ has_dynamic |= output->is_dynamic();
+ }
+ if (!input->is_dynamic() && !has_dynamic)
{
return;
}
auto input_shape = input->getShape();
- const auto axis = op.param().axis;
+ const auto axis_idx{op.getInputs().at(ir::operation::Split::Input::AXIS)};
+ const auto &axis = _tensor_registry->getITensor(axis_idx);
+
+ auto axis_value = *reinterpret_cast<const int32_t *>(axis->buffer());
const auto num_splits = op.param().num_splits;
const auto rank = input_shape.rank();
- auto axis_resolved = axis < 0 ? axis + rank : axis;
+ axis_value = axis_value < 0 ? axis_value + rank : axis_value;
- assert(0 <= axis_resolved && axis_resolved < rank);
+ assert(0 <= axis_value && axis_value < rank);
- ir::Shape new_shape = shape_inference::inferSplitShape(input_shape, axis_resolved, num_splits);
+ ir::Shape new_shape = shape_inference::inferSplitShape(input_shape, axis_value, num_splits);
for (int out_tensor_idx = 0; out_tensor_idx < num_splits; out_tensor_idx++)
{
auto output_ind = op.getOutputs().at(out_tensor_idx);
auto output = _tensor_registry->getITensor(output_ind);
- dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
+ output->applyShape(new_shape);
assert(output->buffer() != nullptr);
}
}
@@ -889,7 +1099,7 @@ void DynamicShapeInferer::visit(const ir::operation::Squeeze &op)
auto output_ind = op.getOutputs().at(0);
auto output = _tensor_registry->getITensor(output_ind);
- dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
+ output->applyShape(new_shape);
assert(output->buffer() != nullptr);
}
@@ -920,17 +1130,16 @@ void DynamicShapeInferer::visit(const ir::operation::StridedSlice &op)
const auto rank = input_shape.rank();
auto op_params = shape_inference::buildStridedSliceParams(
- reinterpret_cast<uint32_t *>(starts->buffer()), reinterpret_cast<uint32_t *>(ends->buffer()),
- reinterpret_cast<uint32_t *>(strides->buffer()), begin_mask, end_mask, shrink_axis_mask,
- rank);
+ reinterpret_cast<uint32_t *>(starts->buffer()), reinterpret_cast<uint32_t *>(ends->buffer()),
+ reinterpret_cast<uint32_t *>(strides->buffer()), begin_mask, end_mask, shrink_axis_mask, rank);
auto output_index = op.getOutputs().at(0);
auto output = _tensor_registry->getITensor(output_index);
ir::Shape output_shape =
- onert::shape_inference::inferStridedSliceShape(input_shape, op_params, rank);
+ onert::shape_inference::inferStridedSliceShape(input_shape, op_params, rank);
- dynamicTensorManagerOf(output)->applyShape(output_index, output_shape);
+ output->applyShape(output_shape);
assert(output->buffer() != nullptr);
}
@@ -952,10 +1161,12 @@ void DynamicShapeInferer::visit(const ir::operation::Tile &op)
auto multiplier_buffer = reinterpret_cast<const int32_t *>(multiplier->buffer());
assert(multiplier_buffer);
- auto output_shape = shape_inference::inferTileShape(input_shape, multiplier_buffer);
+ auto mult_shape = multiplier->getShape();
+ auto output_shape = shape_inference::inferTileShape(
+ input_shape, multiplier_buffer, mult_shape.rank() == 0 ? 1 : mult_shape.dim(0));
// set output shape and output buffer
- dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
+ output->applyShape(output_shape);
assert(output->buffer() != nullptr);
}
@@ -967,17 +1178,49 @@ void DynamicShapeInferer::visit(const ir::operation::Transpose &op)
// from op, access the buffer of second input to read new shape
auto input_ind = op.getInputs().at(ir::operation::Transpose::Input::INPUT);
- auto input_tensor = _tensor_registry->getITensor(input_ind);
- auto input_shape = input_tensor->getShape();
+ auto input = _tensor_registry->getITensor(input_ind);
+ auto input_shape = input->getShape();
+
+ /*
+ Here, the state after compilation (static shape inference) could be one of the following:
+
+ input perms output execution-time shape inf required
+ ------------------------------------ --------------------------------
+ case 1) static const static X
+ case 2) static non-const dynamic O
+ case 3) dynamic const dynamic O
+ case 4) dynamic non-const dynamic O
- if (!input_tensor->is_dynamic())
+ So, only when both input1 and ouput are static, we can skip dynamic shape inference.
+ */
+ if ((!input->is_dynamic()) && (!output->is_dynamic()))
return;
- const auto perm{op.param().perm};
- // set output shape, based on input and params
- ir::Shape new_shape = shape_inference::inferTransposeShape(input_shape, perm);
+ auto perm_ind = op.getInputs().at(ir::operation::Transpose::Input::PERMUTATION);
+ auto perm = _tensor_registry->getITensor(perm_ind);
+
+ ir::Shape new_shape;
+ // TODO Change perm->dimension(0) == 0 to perm->num_elements() == 0
+ if (perm->getShape().dim(0) == 0) // This condition means that perm is (n-1...0)
+ {
+ // Call by (n-1...0)
+ new_shape = shape_inference::inferTransposeShape(input_shape, nullptr, 0);
+ }
+ else
+ {
+ // Check rank
+ if (static_cast<size_t>(input->getShape().rank()) != perm->getShape().num_elements())
+ {
+ throw std::runtime_error("DynamicShapeInferer failed, bad rank size: " +
+ std::to_string(perm->getShape().num_elements()));
+ }
- dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
+ // set output shape, based on input and params
+ const auto perm_buffer = reinterpret_cast<const int32_t *>(perm->buffer());
+ new_shape =
+ shape_inference::inferTransposeShape(input_shape, perm_buffer, perm->getShape().dim(0));
+ }
+ output->applyShape(new_shape);
assert(output->buffer() != nullptr);
}
@@ -1005,7 +1248,7 @@ void DynamicShapeInferer::visit(const ir::operation::Unpack &op)
auto output_ind = op.getOutputs().at(out_tensor_idx);
auto output = _tensor_registry->getITensor(output_ind);
- dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
+ output->applyShape(new_shape);
assert(output->buffer() != nullptr);
}
diff --git a/runtime/onert/core/src/exec/ExecTime.cc b/runtime/onert/core/src/exec/ExecTime.cc
index 6bf2744a9..4b82655b9 100644
--- a/runtime/onert/core/src/exec/ExecTime.cc
+++ b/runtime/onert/core/src/exec/ExecTime.cc
@@ -14,12 +14,10 @@
* limitations under the License.
*/
-#include "exec/ExecTime.h"
+#include "ExecTime.h"
-#include <fstream>
-#include <cassert>
-#include <limits>
#include <algorithm>
+#include <cassert>
namespace onert
{
diff --git a/runtime/onert/core/src/exec/ExecTime.h b/runtime/onert/core/src/exec/ExecTime.h
index 846d0930b..95f460053 100644
--- a/runtime/onert/core/src/exec/ExecTime.h
+++ b/runtime/onert/core/src/exec/ExecTime.h
@@ -34,7 +34,7 @@ class ExecTime
{
public:
explicit ExecTime(const std::vector<const backend::Backend *> &backends)
- : _json(backends, _measurements)
+ : _json(backends, _measurements)
{
}
@@ -94,7 +94,7 @@ public:
/**
* @brief Update metrics file with new data.
*/
- void uploadOperationsExecTime() const { _json.uploadOperationsExecTime(); }
+ void storeOperationsExecTime() const { _json.storeOperationsExecTime(); }
static const int64_t NOT_FOUND = -1;
private:
diff --git a/runtime/onert/core/src/exec/ExecTime.test.cc b/runtime/onert/core/src/exec/ExecTime.test.cc
new file mode 100644
index 000000000..939184e4e
--- /dev/null
+++ b/runtime/onert/core/src/exec/ExecTime.test.cc
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ExecTime.h"
+
+#include "backend/IConfig.h"
+#include "backend/Backend.h"
+
+#include <gtest/gtest.h>
+
+#include <string>
+
+namespace
+{
+using namespace onert;
+using namespace exec;
+using namespace backend;
+
+struct MockConfig : public IConfig
+{
+ std::string id() override { return "b1"; }
+ bool initialize() override { return true; };
+ bool supportPermutation() override { return false; }
+ ir::Layout supportLayout(const ir::IOperation &, ir::Layout) override
+ {
+ return ir::Layout::UNKNOWN;
+ }
+ bool supportDynamicTensor() override { return false; }
+ bool supportFP16() override { return false; }
+};
+
+struct MockBackend : public ::onert::backend::Backend
+{
+ std::shared_ptr<onert::backend::IConfig> config() const override
+ {
+ return std::make_shared<MockConfig>();
+ }
+ std::unique_ptr<onert::backend::BackendContext> newContext(ContextData &&) const override
+ {
+ return nullptr;
+ }
+};
+
+TEST(ExecTime, roundtrip_ok)
+{
+ const auto *b = new MockBackend();
+ std::vector<const Backend *> bs = {b};
+ {
+ ExecTime et(bs);
+ et.updateOperationExecTime(b, "op1", true, 100, 100);
+ et.updateOperationExecTime(b, "op1", true, 200, 200);
+ et.updateOperationExecTime(b, "op1", false, 100, 888);
+ et.storeOperationsExecTime();
+ }
+ {
+ ExecTime et(bs);
+ auto time = et.getOperationExecTime(b, "op1", true, 100);
+ ASSERT_EQ(time, 100);
+ // Check interpolation
+ time = et.getOperationExecTime(b, "op1", true, 150);
+ ASSERT_EQ(time, 150);
+ time = et.getOperationExecTime(b, "op1", false, 100);
+ ASSERT_EQ(time, 888);
+ et.storeOperationsExecTime();
+ }
+ // clean up
+ EXPECT_EQ(remove("exec_time.json"), 0);
+}
+
+TEST(ExecTime, structure)
+{
+
+ const auto *b = new MockBackend();
+ std::vector<const Backend *> bs = {b};
+ {
+ ExecTime et(bs);
+ et.updateOperationExecTime(b, "op1", true, 100, 100);
+ et.updateOperationExecTime(b, "op1", true, 200, 200);
+ et.storeOperationsExecTime();
+ }
+ {
+ ExecTime et(bs);
+ auto time = et.getOperationExecTime(b, "op1", true, 100);
+ ASSERT_EQ(time, 100);
+ // Check interpolation
+ time = et.getOperationExecTime(b, "op1", true, 200);
+ ASSERT_EQ(time, 200);
+ et.storeOperationsExecTime();
+ }
+ // clean up
+ EXPECT_EQ(remove("exec_time.json"), 0);
+}
+} // unnamed namespace
diff --git a/runtime/onert/core/src/exec/Execution.cc b/runtime/onert/core/src/exec/Execution.cc
index 7feb3ab68..f51bed820 100644
--- a/runtime/onert/core/src/exec/Execution.cc
+++ b/runtime/onert/core/src/exec/Execution.cc
@@ -16,6 +16,8 @@
#include "exec/Execution.h"
+#include "train/TrainableExecutors.h"
+
#include "util/logging.h"
namespace onert
@@ -23,33 +25,30 @@ namespace onert
namespace exec
{
-Execution::Execution(const std::shared_ptr<ExecutorMap> &executors) : _executors{executors}
+Execution::Execution(const std::shared_ptr<IExecutors> &executors) : _executors{executors}
{
assert(executors != nullptr);
- assert(executors->at(ir::SubgraphIndex{0}) != nullptr);
- const auto &primary_subg = primary_subgraph();
- _io_desc.inputs.resize(primary_subg.getInputs().size());
- _io_desc.outputs.resize(primary_subg.getOutputs().size());
+ assert(executors->entryExecutor() != nullptr);
+ _io_desc.inputs.resize(_executors->inputSize());
+ _io_desc.outputs.resize(_executors->outputSize());
}
void Execution::changeInputShape(const ir::IOIndex &index, const ir::Shape &new_shape)
{
- // This should be called BEFORE setInput.
- if (_io_desc.inputs.at(index.value()) != 0)
- throw std::runtime_error("Error in calling order");
-
// This will be used later to set input tensor dynamic
// Note that 'compiled' model will not be updated with new_shape
// but new_shape will change model input shape while 'running' the model
_io_desc.dynamic_input_shapes[index] = new_shape;
+
+ VERBOSE(Execution) << "Model input shape will be changed at the start of execute()"
+ << "(index: " << index << ")" << std::endl;
}
// TODO Remove default parameter
void Execution::setInput(const ir::IOIndex &index, const void *buffer, size_t length,
ir::Layout layout)
{
- const auto input_index = primary_subgraph().getInputs().at(index);
- const auto info = primary_subgraph().operands().at(input_index).info();
+ const auto &info = _executors->inputInfo(index);
// TODO handle when (!buffer && length != 0) : setting the input as an optional tensor
@@ -58,10 +57,10 @@ void Execution::setInput(const ir::IOIndex &index, const void *buffer, size_t le
// note: input_shape_sig contains shape passed by nnfw_set_input_tensorinfo()
{
auto input_shape_sig = _io_desc.dynamic_input_shapes.find(index);
- auto size_required = (input_shape_sig != _io_desc.dynamic_input_shapes.end())
- ? input_shape_sig->second.num_elements() *
- onert::ir::sizeOfDataType(info.typeInfo().type())
- : info.total_size();
+ auto size_required =
+ (input_shape_sig != _io_desc.dynamic_input_shapes.end())
+ ? input_shape_sig->second.num_elements() * onert::ir::sizeOfDataType(info.typeInfo().type())
+ : info.total_size();
if (length < size_required)
{
@@ -89,8 +88,7 @@ void Execution::setInput(const ir::IOIndex &index, const ir::TypeInfo &type, con
// TODO Remove default parameter
void Execution::setOutput(const ir::IOIndex &index, void *buffer, size_t length, ir::Layout layout)
{
- const auto output_index = primary_subgraph().getOutputs().at(index);
- const auto info = primary_subgraph().operands().at(output_index).info();
+ const auto &info = _executors->outputInfo(index);
if (length < info.total_size())
{
@@ -104,7 +102,7 @@ void Execution::setOutput(const ir::IOIndex &index, void *buffer, size_t length,
void Execution::setOutput(const ir::IOIndex &index, const ir::TypeInfo &type,
const ir::Shape &shape, void *buffer, size_t length, ir::Layout layout)
{
- auto info = ir::OperandInfo::createStaticInfo(shape, type);
+ const auto &info = ir::OperandInfo::createStaticInfo(shape, type);
if (length < info.total_size())
{
@@ -118,21 +116,21 @@ void Execution::setInputLayout(const ir::IOIndex &index, ir::Layout layout)
{
const auto &input_desc = _io_desc.inputs.at(index.value());
_io_desc.inputs.at(index.value()) =
- std::make_unique<InputDesc>(input_desc->info, input_desc->buffer, input_desc->size, layout);
+ std::make_unique<InputDesc>(input_desc->info, input_desc->buffer, input_desc->size, layout);
}
void Execution::setOutputLayout(const ir::IOIndex &index, ir::Layout layout)
{
const auto &output_desc = _io_desc.outputs.at(index.value());
- _io_desc.outputs.at(index.value()) = std::make_unique<OutputDesc>(
- output_desc->info, output_desc->buffer, output_desc->size, layout);
+ _io_desc.outputs.at(index.value()) =
+ std::make_unique<OutputDesc>(output_desc->info, output_desc->buffer, output_desc->size, layout);
}
void Execution::execute()
{
VERBOSE(Execution) << "Start execution" << std::endl;
- primary_executor()->execute(_io_desc);
+ _executors->execute(_io_desc);
finished = true;
VERBOSE(Execution) << "Execution finished" << std::endl;
@@ -155,13 +153,41 @@ void Execution::waitFinish()
bool Execution::isFinished(void) const { return finished; }
+#ifdef ONERT_TRAIN
+void Execution::train(uint32_t training_step)
+{
+ auto execs = dynamic_cast<exec::train::TrainableExecutors *>(_executors.get());
+ if (!execs)
+ {
+ throw std::runtime_error{"Supported only TrainableExecutors"};
+ }
+
+ VERBOSE(Execution) << "Start training" << std::endl;
+
+ execs->train(_io_desc, training_step);
+ finished = true;
+
+ VERBOSE(Execution) << "training finished" << std::endl;
+}
+
+float Execution::getLoss(const ir::IOIndex &ind)
+{
+ auto execs = dynamic_cast<exec::train::TrainableExecutors *>(_executors.get());
+ if (!execs)
+ {
+ throw std::runtime_error{"Supported only TrainableExecutors"};
+ }
+
+ return execs->getLoss(ind);
+}
+#endif // ONERT_TRAIN
+
ir::Shape Execution::getInputShape(ir::IOIndex ind) const
{
auto itr = _io_desc.dynamic_input_shapes.find(ind);
if (itr == _io_desc.dynamic_input_shapes.end())
{
- auto operand_idx = primary_subgraph().getInputs().at(ind.value());
- return primary_subgraph().operands().at(operand_idx).shape();
+ return _executors->inputInfo(ind).shape();
}
else
{
@@ -169,15 +195,32 @@ ir::Shape Execution::getInputShape(ir::IOIndex ind) const
}
}
+// NNAPI return fail if ANeuralNetworksExecution_getOutputOperandRank or
+// ANeuralNetworksExecution_getOutputOperandDimensions is called before execution.
+// On the other hand, NNFW API return static shape inference result if nnfw_output_tensorinfo is
+// called before execution.
+// To handle both case, this method retun static shape inference result and fail will be handled on
+// NNAPI frontend.
ir::Shape Execution::getOutputShape(ir::IOIndex ind) const
{
if (!isFinished())
- throw std::runtime_error("Cannot get output shape before execution is finished");
+ return _executors->outputInfo(ind).shape();
const auto &output_desc = _io_desc.outputs.at(ind.value());
return output_desc->info.shape();
}
+size_t Execution::getInputTotalSize(ir::IOIndex ind) const
+{
+ // TODO Support dynamic shape
+ return _executors->inputInfo(ind).total_size();
+}
+
+size_t Execution::getOutputTotalSize(ir::IOIndex ind) const
+{
+ return _executors->outputInfo(ind).total_size();
+}
+
} // namespace exec
} // namespace onert
diff --git a/runtime/onert/core/src/exec/Execution.test.cc b/runtime/onert/core/src/exec/Execution.test.cc
new file mode 100644
index 000000000..fefe8a332
--- /dev/null
+++ b/runtime/onert/core/src/exec/Execution.test.cc
@@ -0,0 +1,635 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "exec/Execution.h"
+
+#include "compiler/Compiler.h"
+#include "compiler/CompilerFactory.h"
+#include "ir/Graph.h"
+#include "ir/operation/BinaryArithmetic.h"
+#include "util/TracingCtx.h"
+
+#include <gtest/gtest.h>
+#include <thread>
+
+namespace
+{
+
+using namespace onert::ir;
+
+class CompiledMockUpModel
+{
+public:
+ CompiledMockUpModel()
+ {
+ // Model: two elementwise add operation
+ // model input: lhs, rhs1
+ // model output: second add result (result2)
+ // constant: rhs2
+ // result1 <= (lhs + rhs)
+ // result2 <= (result1 + rhs2)
+ // lhs, rhs1, rh2, result1, result2 shape: {1, 2, 2, 1}
+ // activation: none (constant)
+ graph = std::make_shared<Graph>();
+ // 1st add operands (result1 <= lhs + rhs1)
+ Shape shape{1, 2, 2, 1};
+ TypeInfo type{DataType::FLOAT32};
+ static float rhs2_data[4] = {3, 1, -1, 5};
+ auto operand_lhs = graph->addOperand(shape, type);
+ auto operand_rhs1 = graph->addOperand(shape, type);
+ auto operand_result1 = graph->addOperand(shape, type);
+ auto operand_rhs2 = graph->addOperand(shape, type);
+ auto operand_result2 = graph->addOperand(shape, type);
+ graph->operands()
+ .at(operand_rhs2)
+ .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
+ // 2nd add operations (result2 <= result1 + rhs2)
+ operation::BinaryArithmetic::Param param1;
+ param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+ param1.activation = Activation::NONE;
+ auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
+ auto output_set1 = OperandIndexSequence{operand_result1};
+ graph->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
+ operation::BinaryArithmetic::Param param2;
+ param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+ param2.activation = Activation::NONE;
+ auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
+ auto output_set2 = OperandIndexSequence{operand_result2};
+ graph->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
+ // Identify model inputs and outputs
+ graph->addInput(operand_lhs);
+ graph->addInput(operand_rhs1);
+ graph->addOutput(operand_result2);
+ graph->verify();
+
+ // Compile
+ auto model = std::make_shared<onert::ir::Model>();
+ model->push(onert::ir::SubgraphIndex{0}, graph);
+ coptions = onert::compiler::CompilerOptions::fromGlobalConfig();
+ onert::compiler::Compiler compiler{model, *coptions};
+ artifact = compiler.compile();
+ }
+
+public:
+ std::shared_ptr<Graph> graph;
+ std::unique_ptr<onert::compiler::CompilerOptions> coptions;
+ std::shared_ptr<onert::compiler::CompilerArtifact> artifact;
+};
+
+class CompiledMockUpMultiModel
+{
+public:
+ CompiledMockUpMultiModel()
+ {
+ // Model0: a float elementwise add operation
+ // Model0 input: lhs0, rhs0
+ // Model0 output: add result (result0)
+
+ // Model1: a qasymm8 elementwise add operation
+ // Model1 input: result0, rhs1
+ // Model1 output: add result (result1)
+
+ // Model2: a float elementwise add operation
+ // Model2 input: result0, result1
+ // Model2 output: add result (result2)
+
+ // constant: rhs2
+ // result0 <= (lhs0 + rhs0)
+ // result1 <= (result0 + rhs1)
+ // result2 <= (result0 + result1)
+ // lhs0, rhs0, rh1, result0, result1, result2 shape: {1, 2, 2, 1}
+ // activation: none (constant)
+
+ // Update edge information
+ edges.pkg_inputs.emplace_back(ModelIndex{0}, SubgraphIndex{0}, IOIndex{0});
+ edges.pkg_inputs.emplace_back(ModelIndex{0}, SubgraphIndex{0}, IOIndex{1});
+ edges.pkg_outputs.emplace_back(ModelIndex{2}, SubgraphIndex{0}, IOIndex{0});
+ // From
+ const auto result0 = IODesc{ModelIndex{0}, SubgraphIndex{0}, IOIndex{0}};
+ const auto result1 = IODesc{ModelIndex{1}, SubgraphIndex{0}, IOIndex{0}};
+ // To
+ const auto lhs1 = IODesc{ModelIndex{1}, SubgraphIndex{0}, IOIndex{0}};
+ const auto lhs2 = IODesc{ModelIndex{2}, SubgraphIndex{0}, IOIndex{0}};
+ const auto rhs2 = IODesc{ModelIndex{2}, SubgraphIndex{0}, IOIndex{1}};
+ edges.edges.insert({result0, lhs1});
+ edges.edges.insert({result0, lhs2});
+ edges.edges.insert({result1, rhs2});
+
+ for (size_t i = 0; i < 3; ++i)
+ {
+ graphs.emplace_back(std::make_shared<Graph>());
+ }
+ Shape shape{1, 2, 2, 1};
+
+ // Model0's add operands (result1 <= lhs0 + rhs0)
+ DataType types[3] = {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM, DataType::FLOAT32};
+ auto operand_lhs0 = graphs[0]->addOperand(shape, TypeInfo{types[0]});
+ auto operand_rhs0 = graphs[0]->addOperand(shape, TypeInfo{types[0]});
+ auto operand_result0 = graphs[0]->addOperand(shape, TypeInfo{types[0]});
+
+ // Model0's add operation
+ operation::BinaryArithmetic::Param param0;
+ param0.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+ param0.activation = Activation::NONE;
+ auto input_set0 = OperandIndexSequence{operand_lhs0, operand_rhs0};
+ auto output_set0 = OperandIndexSequence{operand_result0};
+ graphs[0]->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set0, output_set0, param0));
+
+ // Model0's inputs/outputs
+ graphs[0]->addInput(operand_lhs0);
+ graphs[0]->addInput(operand_rhs0);
+ graphs[0]->addOutput(operand_result0);
+ graphs[0]->verify();
+
+ // Model1's add operands (result2 <= Model0 result + rhs1)
+ // static float rhs1_data[4] = {3, 1, -1, 5};
+ static uint8_t rhs1_data[4] = {131, 129, 127, 133};
+ const float scale = 1;
+ const int32_t zero_point = 128;
+ auto operand_lhs1 = graphs[1]->addOperand(shape, TypeInfo{types[1], scale, zero_point});
+ auto operand_rhs1 = graphs[1]->addOperand(shape, TypeInfo{types[1], scale, zero_point});
+ auto operand_result1 = graphs[1]->addOperand(shape, TypeInfo{types[1], scale, zero_point});
+ graphs[1]
+ ->operands()
+ .at(operand_rhs1)
+ .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs1_data), 4));
+
+ // Model1's add operation
+ operation::BinaryArithmetic::Param param1;
+ param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+ param1.activation = Activation::NONE;
+ auto input_set1 = OperandIndexSequence{operand_lhs1, operand_rhs1};
+ auto output_set1 = OperandIndexSequence{operand_result1};
+ graphs[1]->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
+
+ // Model1's inputs/outputs
+ graphs[1]->addInput(operand_lhs1);
+ graphs[1]->addOutput(operand_result1);
+ graphs[1]->verify();
+
+ // Model2's additional operands (result3 <= Model0 result + Model1 result)
+ auto operand_lhs2 = graphs[2]->addOperand(shape, TypeInfo{types[2]});
+ auto operand_rhs2 = graphs[2]->addOperand(shape, TypeInfo{types[2]});
+ auto operand_result2 = graphs[2]->addOperand(shape, TypeInfo{types[2]});
+
+ // Model2's add operation
+ operation::BinaryArithmetic::Param param2;
+ param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+ param2.activation = Activation::NONE;
+ auto input_set2 = OperandIndexSequence{operand_lhs2, operand_rhs2};
+ auto output_set2 = OperandIndexSequence{operand_result2};
+ graphs[2]->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
+
+ // Model1's inputs/outputs
+ graphs[2]->addInput(operand_lhs2);
+ graphs[2]->addInput(operand_rhs2);
+ graphs[2]->addOutput(operand_result2);
+ graphs[2]->verify();
+
+ // Compile
+ compile();
+ }
+
+public:
+ void compile()
+ {
+ auto nnpkg = std::make_shared<onert::ir::NNPkg>();
+ coptions.clear();
+ for (uint16_t i = 0; i < graphs.size(); ++i)
+ {
+ coptions.emplace_back(onert::compiler::CompilerOptions::fromGlobalConfig());
+
+ auto model = std::make_shared<onert::ir::Model>();
+ model->push(SubgraphIndex{0}, graphs[i]);
+
+ nnpkg->push(onert::ir::ModelIndex{i}, std::move(model));
+ }
+ for (const auto &pkg_input : edges.pkg_inputs)
+ {
+ nnpkg->addInput(pkg_input);
+ }
+ for (const auto &pkg_output : edges.pkg_outputs)
+ {
+ nnpkg->addOutput(pkg_output);
+ }
+ for (const auto &edge : edges.edges)
+ {
+ nnpkg->addEdge(edge.from, edge.to);
+ }
+ auto compiler = onert::compiler::CompilerFactory::get().create(nnpkg, coptions);
+ nnpkg.reset();
+ artifact = compiler->compile();
+ }
+
+public:
+ std::vector<std::shared_ptr<Graph>> graphs;
+ std::vector<std::unique_ptr<onert::compiler::CompilerOptions>> coptions;
+ std::shared_ptr<onert::compiler::CompilerArtifact> artifact;
+ ModelEdges edges;
+};
+
+TEST(ExecInstance, simple)
+{
+ auto mockup = CompiledMockUpModel();
+ auto graph = mockup.graph;
+ auto executors = mockup.artifact->_executors;
+
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto output = IOIndex{0};
+
+ const float input1_buffer[4] = {1, 0, -1, -2};
+ const float input2_buffer[4] = {1, -3, 2, -4};
+ float output_buffer[4] = {};
+ const float output_expected[4] = {5, -2, 0, -1};
+
+ onert::exec::Execution execution{executors};
+
+ execution.setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
+ execution.setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
+ execution.setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
+ execution.execute();
+
+ for (auto i = 0; i < 4; i++)
+ {
+ EXPECT_EQ(output_buffer[i], output_expected[i]);
+ }
+}
+
+TEST(ExecInstance, twoCompile)
+{
+ auto mockup = CompiledMockUpModel();
+ auto graph = mockup.graph;
+ auto executors1 = mockup.artifact->_executors;
+ onert::exec::Execution execution1{executors1};
+
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto output = IOIndex{0};
+
+ const float exe1_input1_buffer[4] = {1, 0, -1, -2};
+ const float exe1_input2_buffer[4] = {1, -3, 2, -4};
+ float exe1_output_buffer[4] = {};
+ const float exe1_output_expected[4] = {5, -2, 0, -1};
+
+ execution1.setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
+ execution1.setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
+ execution1.setOutput(output, reinterpret_cast<void *>(exe1_output_buffer), 16);
+
+ // Make new executor: compile again
+ auto model = std::make_shared<onert::ir::Model>();
+ model->push(onert::ir::SubgraphIndex{0}, graph);
+ auto coptions = onert::compiler::CompilerOptions::fromGlobalConfig();
+ onert::compiler::Compiler compiler{model, *coptions};
+ std::shared_ptr<onert::compiler::CompilerArtifact> artifact = compiler.compile();
+ onert::exec::Execution execution2{artifact->_executors};
+
+ const float exe2_input1_buffer[4] = {2, 1, -2, 0};
+ const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
+ float exe2_output_buffer[4] = {};
+ const float exe2_output_expected[4] = {2, 5, -2, 7};
+
+ execution2.setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
+ execution2.setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
+ execution2.setOutput(output, reinterpret_cast<void *>(exe2_output_buffer), 16);
+
+ execution1.execute();
+ execution2.execute();
+
+ for (auto i = 0; i < 4; i++)
+ {
+ EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
+ EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
+ }
+}
+
+// Support two initialized execution instance then ordered execution
+TEST(ExecInstance, twoExecution)
+{
+ auto mockup = CompiledMockUpModel();
+ auto executors = mockup.artifact->_executors;
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto output1 = IOIndex{0};
+
+ const float exe1_input1_buffer[4] = {1, 0, -1, -2};
+ const float exe1_input2_buffer[4] = {1, -3, 2, -4};
+ float exe1_output_buffer[4] = {};
+ const float exe1_output_expected[4] = {5, -2, 0, -1};
+ const float exe2_output_expected[4] = {2, 5, -2, 7};
+
+ onert::exec::Execution execution1{executors};
+ execution1.setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
+ execution1.setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
+ execution1.setOutput(output1, reinterpret_cast<void *>(exe1_output_buffer), 16);
+
+ const float exe2_input1_buffer[4] = {2, 1, -2, 0};
+ const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
+ float exe2_output_buffer[4] = {};
+
+ // Make new execution
+ onert::exec::Execution execution2{executors};
+ execution2.setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
+ execution2.setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
+ execution2.setOutput(output1, reinterpret_cast<void *>(exe2_output_buffer), 16);
+
+ execution1.execute();
+ execution2.execute();
+
+ for (auto i = 0; i < 4; i++)
+ {
+ EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
+ EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
+ }
+}
+
+class Inference
+{
+public:
+ Inference(const float (&input1)[4], const float (&input2)[4], float (&output)[4],
+ std::shared_ptr<onert::exec::IExecutors> &executors)
+ : _input1{input1}, _input2{input2}, _output{output}, _executors{executors}
+ {
+ // DO NOTHING
+ }
+
+ void inference(void)
+ {
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto output1 = IOIndex{0};
+
+ onert::exec::Execution execution{_executors};
+ execution.setInput(input1, reinterpret_cast<const void *>(_input1), 16);
+ execution.setInput(input2, reinterpret_cast<const void *>(_input2), 16);
+ execution.setOutput(output1, reinterpret_cast<void *>(_output), 16);
+
+ execution.execute();
+ }
+
+private:
+ const float (&_input1)[4];
+ const float (&_input2)[4];
+ float (&_output)[4];
+ std::shared_ptr<onert::exec::IExecutors> &_executors;
+};
+
+// Support multi-thread execution
+TEST(ExecInstance, twoThreads)
+{
+ auto mockup = CompiledMockUpModel();
+ auto executors = mockup.artifact->_executors;
+
+ const float exe1_input1_buffer[4] = {1, 0, -1, -2};
+ const float exe1_input2_buffer[4] = {1, -3, 2, -4};
+ float exe1_output_buffer[4] = {};
+ const float exe1_output_expected[4] = {5, -2, 0, -1};
+
+ Inference execution1{exe1_input1_buffer, exe1_input2_buffer, exe1_output_buffer, executors};
+
+ const float exe2_input1_buffer[4] = {2, 1, -2, 0};
+ const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
+ float exe2_output_buffer[4] = {};
+ const float exe2_output_expected[4] = {2, 5, -2, 7};
+
+ Inference execution2{exe2_input1_buffer, exe2_input2_buffer, exe2_output_buffer, executors};
+
+ std::thread t1{&Inference::inference, &execution1};
+ std::thread t2{&Inference::inference, &execution2};
+
+ t1.join();
+ t2.join();
+
+ for (auto i = 0; i < 4; i++)
+ {
+ EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
+ EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
+ }
+}
+
+// Support asynchronous execution
+TEST(ExecInstance, async)
+{
+ auto mockup = CompiledMockUpModel();
+ auto graph = mockup.graph;
+ auto executors = mockup.artifact->_executors;
+
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto output = IOIndex{0};
+
+ const float input1_buffer[4] = {1, 0, -1, -2};
+ const float input2_buffer[4] = {1, -3, 2, -4};
+ float output_buffer[4] = {};
+ const float output_expected[4] = {5, -2, 0, -1};
+
+ onert::exec::Execution execution{executors};
+
+ execution.setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
+ execution.setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
+ execution.setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
+ execution.startExecute();
+ execution.waitFinish();
+
+ for (auto i = 0; i < 4; i++)
+ {
+ EXPECT_EQ(output_buffer[i], output_expected[i]);
+ }
+}
+
+TEST(ExecInstance, multi_model_simple)
+{
+ auto mockup = CompiledMockUpMultiModel();
+ auto executors = mockup.artifact->_executors;
+
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto output = IOIndex{0};
+
+ const float input1_buffer[4] = {1, 0, -1, -2};
+ const float input2_buffer[4] = {1, -3, 2, -4};
+ float output_buffer[4] = {};
+ const float output_expected[4] = {7, -5, 1, -7};
+
+ onert::exec::Execution execution{executors};
+
+ execution.setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
+ execution.setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
+ execution.setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
+ execution.execute();
+
+ for (auto i = 0; i < 4; i++)
+ {
+ EXPECT_EQ(output_buffer[i], output_expected[i]);
+ }
+}
+
+TEST(ExecInstance, multi_model_twoCompile)
+{
+ auto mockup = CompiledMockUpMultiModel();
+ auto executors1 = mockup.artifact->_executors;
+ onert::exec::Execution execution1{executors1};
+
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto output = IOIndex{0};
+
+ const float exe1_input1_buffer[4] = {1, 0, -1, -2};
+ const float exe1_input2_buffer[4] = {1, -3, 2, -4};
+ float exe1_output_buffer[4] = {};
+ const float exe1_output_expected[4] = {7, -5, 1, -7};
+
+ execution1.setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
+ execution1.setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
+ execution1.setOutput(output, reinterpret_cast<void *>(exe1_output_buffer), 16);
+
+ // Make new executor: compile again
+ mockup.compile();
+ onert::exec::Execution execution2{mockup.artifact->_executors};
+
+ const float exe2_input1_buffer[4] = {2, 1, -2, 0};
+ const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
+ float exe2_output_buffer[4] = {};
+ const float exe2_output_expected[4] = {1, 9, -3, 9};
+
+ execution2.setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
+ execution2.setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
+ execution2.setOutput(output, reinterpret_cast<void *>(exe2_output_buffer), 16);
+
+ execution1.execute();
+ execution2.execute();
+
+ for (auto i = 0; i < 4; i++)
+ {
+ EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
+ EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
+ }
+}
+
+// Support two initialized execution instance then ordered execution
+TEST(ExecInstance, multi_model_twoExecution)
+{
+ auto mockup = CompiledMockUpMultiModel();
+ auto executors = mockup.artifact->_executors;
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto output1 = IOIndex{0};
+
+ const float exe1_input1_buffer[4] = {1, 0, -1, -2};
+ const float exe1_input2_buffer[4] = {1, -3, 2, -4};
+ float exe1_output_buffer[4] = {};
+ const float exe1_output_expected[4] = {7, -5, 1, -7};
+ const float exe2_output_expected[4] = {1, 9, -3, 9};
+
+ onert::exec::Execution execution1{executors};
+ execution1.setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
+ execution1.setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
+ execution1.setOutput(output1, reinterpret_cast<void *>(exe1_output_buffer), 16);
+
+ const float exe2_input1_buffer[4] = {2, 1, -2, 0};
+ const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
+ float exe2_output_buffer[4] = {};
+
+ // Make new execution
+ onert::exec::Execution execution2{executors};
+ execution2.setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
+ execution2.setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
+ execution2.setOutput(output1, reinterpret_cast<void *>(exe2_output_buffer), 16);
+
+ execution1.execute();
+ execution1.execute();
+ execution2.execute();
+ execution2.execute();
+
+ for (auto i = 0; i < 4; i++)
+ {
+ EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
+ EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
+ }
+}
+
+// Multi-model is not thread-safe yet
+
+// Support asynchronous execution
+TEST(ExecInstance, multi_model_async)
+{
+ auto mockup = CompiledMockUpMultiModel();
+ auto executors = mockup.artifact->_executors;
+
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto output = IOIndex{0};
+
+ const float input1_buffer[4] = {1, 0, -1, -2};
+ const float input2_buffer[4] = {1, -3, 2, -4};
+ float output_buffer[4] = {};
+ const float output_expected[4] = {7, -5, 1, -7};
+
+ onert::exec::Execution execution{executors};
+
+ execution.setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
+ execution.setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
+ execution.setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
+ execution.startExecute();
+ execution.waitFinish();
+
+ for (auto i = 0; i < 4; i++)
+ {
+ EXPECT_EQ(output_buffer[i], output_expected[i]);
+ }
+}
+
+TEST(ExecInstance, multi_model_dequant_input_quant_output)
+{
+ auto mockup = CompiledMockUpMultiModel();
+ auto executors = mockup.artifact->_executors;
+
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto output = IOIndex{0};
+
+ const uint8_t input1_buffer[4] = {138, 128, 118, 108}; // {1, 0, -1, -2}
+ const uint8_t input2_buffer[4] = {138, 98, 148, 88}; // {1, -3, 2, -4}
+ uint8_t output_buffer[4] = {};
+ const uint8_t output_expected[4] = {198, 78, 138, 58}; // {7, -5, 1, -7}
+ float scale = 0.1;
+ int32_t zero_point = 128;
+
+ onert::exec::Execution execution{executors};
+
+ onert::ir::TypeInfo type_info{onert::ir::DataType::QUANT_UINT8_ASYMM, scale, zero_point};
+ execution.setInput(input1, type_info, execution.getInputShape(input1),
+ reinterpret_cast<const void *>(input1_buffer), 4, onert::ir::Layout::NHWC);
+ execution.setInput(input2, type_info, execution.getInputShape(input2),
+ reinterpret_cast<const void *>(input2_buffer), 4, onert::ir::Layout::NHWC);
+ execution.setOutput(output, type_info, execution.getOutputShape(output),
+ reinterpret_cast<void *>(output_buffer), 4, onert::ir::Layout::NHWC);
+ execution.execute();
+
+ for (auto i = 0; i < 4; i++)
+ {
+ EXPECT_EQ(output_buffer[i], output_expected[i]);
+ }
+}
+
+// TODO Add an unittest multi_model_quant_input_dequant_output
+
+} // namespace
diff --git a/runtime/onert/core/src/exec/ExecutionObservee.cc b/runtime/onert/core/src/exec/ExecutionObservee.cc
index ddb1fb6a0..66610f0e0 100644
--- a/runtime/onert/core/src/exec/ExecutionObservee.cc
+++ b/runtime/onert/core/src/exec/ExecutionObservee.cc
@@ -26,37 +26,37 @@ void ExecutionObservee::add(std::unique_ptr<IExecutionObserver> observer)
_observers.emplace_back(std::move(observer));
}
-void ExecutionObservee::notifyModelBegin(IExecutor *executor)
+void ExecutionObservee::notifySubgraphBegin(ir::SubgraphIndex ind)
{
- for (auto &o : _observers)
+ for (auto &&o : _observers)
{
- o->handleBegin(executor);
+ o->handleSubgraphBegin(ind);
}
}
-void ExecutionObservee::notifyModelEnd(IExecutor *executor)
+void ExecutionObservee::notifySubgraphEnd(ir::SubgraphIndex ind)
{
- for (auto &o : _observers)
+ for (auto &&o : _observers)
{
- o->handleEnd(executor);
+ o->handleSubgraphEnd(ind);
}
}
-void ExecutionObservee::notifyJobBegin(IExecutor *executor, const ir::OpSequence *op_seq,
- const backend::Backend *backend)
+void ExecutionObservee::notifyJobBegin(IExecutor *executor, ir::SubgraphIndex subg_ind,
+ ir::OperationIndex op_ind, const backend::Backend *backend)
{
- for (auto &o : _observers)
+ for (auto &&o : _observers)
{
- o->handleBegin(executor, op_seq, backend);
+ o->handleJobBegin(executor, subg_ind, op_ind, backend);
}
}
-void ExecutionObservee::notifyJobEnd(IExecutor *executor, const ir::OpSequence *op_seq,
- const backend::Backend *backend)
+void ExecutionObservee::notifyJobEnd(IExecutor *executor, ir::SubgraphIndex subg_ind,
+ ir::OperationIndex op_ind, const backend::Backend *backend)
{
- for (auto &o : _observers)
+ for (auto &&o : _observers)
{
- o->handleEnd(executor, op_seq, backend);
+ o->handleJobEnd(executor, subg_ind, op_ind, backend);
}
}
diff --git a/runtime/onert/core/src/exec/ExecutionObservee.h b/runtime/onert/core/src/exec/ExecutionObservee.h
index 49d409a3a..3ee1754c9 100644
--- a/runtime/onert/core/src/exec/ExecutionObservee.h
+++ b/runtime/onert/core/src/exec/ExecutionObservee.h
@@ -17,9 +17,11 @@
#ifndef __ONERT_EXEC_EXECUTION_OBSERVEE_H__
#define __ONERT_EXEC_EXECUTION_OBSERVEE_H__
-#include <list>
+#include "ExecutionObservers.h"
+
+#include "ir/Index.h"
-#include "exec/ExecutionObservers.h"
+#include <list>
namespace onert
{
@@ -39,11 +41,11 @@ public:
* @param observer Observer to be added
*/
void add(std::unique_ptr<IExecutionObserver> observer);
- void notifyModelBegin(IExecutor *executor);
- void notifyModelEnd(IExecutor *executor);
- void notifyJobBegin(IExecutor *executor, const ir::OpSequence *op_seq,
+ void notifySubgraphBegin(ir::SubgraphIndex ind);
+ void notifySubgraphEnd(ir::SubgraphIndex ind);
+ void notifyJobBegin(IExecutor *executor, ir::SubgraphIndex subg_ind, ir::OperationIndex op_ind,
const backend::Backend *backend);
- void notifyJobEnd(IExecutor *executor, const ir::OpSequence *op_seq,
+ void notifyJobEnd(IExecutor *executor, ir::SubgraphIndex subg_ind, ir::OperationIndex op_ind,
const backend::Backend *backend);
private:
diff --git a/runtime/onert/core/src/exec/ExecutionObservers.cc b/runtime/onert/core/src/exec/ExecutionObservers.cc
index 060f874de..5245518a0 100644
--- a/runtime/onert/core/src/exec/ExecutionObservers.cc
+++ b/runtime/onert/core/src/exec/ExecutionObservers.cc
@@ -14,14 +14,58 @@
* limitations under the License.
*/
-#include "exec/ExecutionObservers.h"
+#include "ExecutionObservers.h"
-#include <string>
+#include "../util/EventWriter.h"
#include "util/logging.h"
-#include "exec/IExecutor.h"
-#include "misc/polymorphic_downcast.h"
-#include "ir/OpSequence.h"
+
+#include <misc/polymorphic_downcast.h>
+
+#include <string>
+#include <sstream>
+
+namespace
+{
+
+void setUserData(const onert::ir::Graph &g, const onert::ir::IOperation *op,
+ decltype(EventCollector::Event::userData) &data)
+{
+ // From a tensor of shape [a, b, c], this will return a string "shape(a b c)".
+ // String like "[1, 2, 3]" looks better but this will be considered as a list in Json
+ // so text search (e.g., Ctrl-F in Chrome Tracing) could be difficult
+ auto build_shape_str = [&](onert::ir::OperandIndex operand_idx) {
+ std::string shape_str;
+ auto &shape = g.operands().at(operand_idx).info().shape();
+ for (int i = 0; i < shape.rank(); i++)
+ {
+ if (i == 0)
+ shape_str = "shape(" + std::to_string(shape.dim(i));
+ else
+ shape_str += " " + std::to_string(shape.dim(i));
+ }
+ shape_str += ")";
+
+ return shape_str;
+ };
+
+ auto &inputs = op->getInputs();
+ auto size = inputs.size();
+ for (size_t i = 0; i < size; i++)
+ {
+ auto operand_idx = inputs.at(i);
+ if (operand_idx.undefined())
+ continue;
+
+ std::string key("input_shape_" + std::to_string(i));
+ std::string value = build_shape_str(operand_idx);
+ data.emplace_back(std::make_pair(key, value));
+ }
+
+ // add other userData as needed
+}
+
+} // namespace
namespace onert
{
@@ -29,8 +73,8 @@ namespace onert
namespace exec
{
-void ProfileObserver::handleBegin(onert::exec::IExecutor *, const ir::OpSequence *,
- const onert::backend::Backend *backend)
+void ProfileObserver::handleJobBegin(onert::exec::IExecutor *, ir::SubgraphIndex,
+ ir::OperationIndex, const onert::backend::Backend *backend)
{
_timer = backend->config()->timer();
if (_timer == nullptr)
@@ -38,14 +82,14 @@ void ProfileObserver::handleBegin(onert::exec::IExecutor *, const ir::OpSequence
_timer->handleBegin();
}
-void ProfileObserver::handleEnd(IExecutor *exec, const ir::OpSequence *op_seq,
- const backend::Backend *backend)
+void ProfileObserver::handleJobEnd(IExecutor *exec, ir::SubgraphIndex,
+ const ir::OperationIndex op_ind, const backend::Backend *backend)
{
_timer->handleEnd();
const auto timer_res = _timer->getTime();
- // NOTE This assumes there is just one operation in a op_seq
- const auto &node = _graph.operations().at(op_seq->operations().at(0));
+ // NOTE This assumes there is just one operation in a op
+ const auto &node = _graph.operations().at(op_ind);
auto node_name = node.name();
VERBOSE(ProfileInfo) << "Time for " << node_name << " : " << timer_res << std::endl;
@@ -54,7 +98,7 @@ void ProfileObserver::handleEnd(IExecutor *exec, const ir::OpSequence *op_seq,
ir::DataType::QUANT_UINT8_ASYMM;
uint32_t size = 0;
- for (const auto &ind : node.getInputs() + node.getOutputs())
+ for (const auto &ind : (node.getInputs() + node.getOutputs()) | ir::Remove::UNDEFINED)
{
size += exec->graph().operands().at(ind).info().total_size();
}
@@ -69,64 +113,59 @@ void ProfileObserver::handleEnd(IExecutor *exec, const ir::OpSequence *op_seq,
}
};
-ChromeTracingObserver::ChromeTracingObserver(const std::string &filepath, const ir::Graph &graph)
- : _ofs{filepath, std::ofstream::out}, _recorder{}, _collector{&_recorder}, _graph{graph}
+TracingObserver::TracingObserver(const std::string &filepath, const ir::Graph &graph,
+ const util::TracingCtx *tracing_ctx)
+ : _recorder{std::make_unique<EventRecorder>()}, _collector{_recorder.get()}, _graph{graph},
+ _tracing_ctx{tracing_ctx}
{
+ _event_writer = EventWriter::get(filepath);
+ _event_writer->startToUse();
}
-ChromeTracingObserver::~ChromeTracingObserver()
+TracingObserver::~TracingObserver()
{
try
{
- _recorder.writeToFile(_ofs);
+ _event_writer->readyToFlush(std::move(_recorder));
}
catch (const std::exception &e)
{
- std::cerr << "E: Fail to record event in ChromeTracingObserver: " << e.what() << std::endl;
+ std::cerr << "E: Fail to record event in TracingObserver: " << e.what() << std::endl;
}
}
-void ChromeTracingObserver::handleBegin(IExecutor *)
+void TracingObserver::handleSubgraphBegin(ir::SubgraphIndex subg_ind)
{
- _collector.onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "runtime", "Graph"});
+ _collector.onEvent(
+ EventCollector::SubgEvent{_tracing_ctx, EventCollector::Edge::BEGIN, subg_ind.value()});
}
-void ChromeTracingObserver::handleBegin(IExecutor *, const ir::OpSequence *op_seq,
- const backend::Backend *backend)
+void TracingObserver::handleJobBegin(IExecutor *, ir::SubgraphIndex subg_ind,
+ ir::OperationIndex op_ind, const backend::Backend *backend)
{
std::string backend_id = backend->config()->id();
- _collector.onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, backend_id,
- opSequenceTag(op_seq, _graph.operations())});
+ const auto &op = _graph.operations().at(op_ind);
+ auto ev = EventCollector::OpSeqEvent{_tracing_ctx, EventCollector::Edge::BEGIN,
+ subg_ind.value(), backend_id,
+ op_ind.value(), op.name()};
+ // add shape of inputs
+ setUserData(_graph, &op, ev.userData);
+ _collector.onEvent(ev);
}
-void ChromeTracingObserver::handleEnd(IExecutor *, const ir::OpSequence *op_seq,
- const backend::Backend *backend)
+void TracingObserver::handleJobEnd(IExecutor *, ir::SubgraphIndex subg_ind,
+ ir::OperationIndex op_ind, const backend::Backend *backend)
{
std::string backend_id = backend->config()->id();
- _collector.onEvent(EventCollector::Event{EventCollector::Edge::END, backend_id,
- opSequenceTag(op_seq, _graph.operations())});
+ _collector.onEvent(EventCollector::OpSeqEvent{_tracing_ctx, EventCollector::Edge::END,
+ subg_ind.value(), backend_id, op_ind.value(),
+ _graph.operations().at(op_ind).name()});
}
-void ChromeTracingObserver::handleEnd(IExecutor *)
+void TracingObserver::handleSubgraphEnd(ir::SubgraphIndex subg_ind)
{
- _collector.onEvent(EventCollector::Event{EventCollector::Edge::END, "runtime", "Graph"});
-}
-
-std::string ChromeTracingObserver::opSequenceTag(const ir::OpSequence *op_seq,
- const ir::Operations &operations)
-{
- if (op_seq->size() == 0)
- return "Empty OpSequence";
-
- const auto &first_op_idx = op_seq->operations().at(0);
- const auto &first_op_node = operations.at(first_op_idx);
- std::string tag = "$" + std::to_string(first_op_idx.value());
- tag += " " + first_op_node.name();
- if (op_seq->size() > 1)
- {
- tag += " (+" + std::to_string(op_seq->size() - 1) + ")";
- }
- return tag;
+ _collector.onEvent(
+ EventCollector::SubgEvent{_tracing_ctx, EventCollector::Edge::END, subg_ind.value()});
}
} // namespace exec
diff --git a/runtime/onert/core/src/exec/ExecutionObservers.h b/runtime/onert/core/src/exec/ExecutionObservers.h
index ac0076ed2..7e93ecf7c 100644
--- a/runtime/onert/core/src/exec/ExecutionObservers.h
+++ b/runtime/onert/core/src/exec/ExecutionObservers.h
@@ -17,13 +17,16 @@
#ifndef __ONERT_EXEC_OBSREVERS_H__
#define __ONERT_EXEC_OBSREVERS_H__
-#include "exec/IFunction.h"
-#include "ir/OpSequence.h"
#include "ExecTime.h"
-#include "util/ITimer.h"
+#include "../util/EventCollector.h"
+#include "../util/EventRecorder.h"
+#include "../util/EventWriter.h"
+
#include "exec/IExecutor.h"
-#include "util/EventCollector.h"
-#include "util/EventRecorder.h"
+#include "ir/Index.h"
+#include "ir/IOperation.h"
+#include "util/ITimer.h"
+#include "util/TracingCtx.h"
namespace onert
{
@@ -33,13 +36,15 @@ class IExecutionObserver
{
public:
/// @brief Invoked just before model (not individual operation) execution begins
- virtual void handleBegin(IExecutor *) { return; }
+ virtual void handleSubgraphBegin(ir::SubgraphIndex) { return; }
- virtual void handleBegin(IExecutor *, const ir::OpSequence *, const backend::Backend *) = 0;
- virtual void handleEnd(IExecutor *, const ir::OpSequence *, const backend::Backend *) = 0;
+ virtual void handleJobBegin(IExecutor *, ir::SubgraphIndex, ir::OperationIndex,
+ const backend::Backend *) = 0;
+ virtual void handleJobEnd(IExecutor *, ir::SubgraphIndex, ir::OperationIndex,
+ const backend::Backend *) = 0;
/// @brief Invoked just after model (not individual operation) execution ends
- virtual void handleEnd(IExecutor *) { return; }
+ virtual void handleSubgraphEnd(ir::SubgraphIndex) { return; }
virtual ~IExecutionObserver() = default;
};
@@ -48,13 +53,15 @@ class ProfileObserver : public IExecutionObserver
{
public:
explicit ProfileObserver(std::shared_ptr<ExecTime> et, const ir::Graph &graph)
- : _et(std::move(et)), _graph(graph)
+ : _et(std::move(et)), _graph(graph)
{
}
- void handleBegin(IExecutor *, const ir::OpSequence *, const backend::Backend *) override;
- void handleEnd(IExecutor *, const ir::OpSequence *, const backend::Backend *) override;
+ void handleJobBegin(IExecutor *, ir::SubgraphIndex, ir::OperationIndex,
+ const backend::Backend *) override;
+ void handleJobEnd(IExecutor *, ir::SubgraphIndex, ir::OperationIndex,
+ const backend::Backend *) override;
- void handleEnd(IExecutor *) override { _et->uploadOperationsExecTime(); }
+ void handleSubgraphEnd(ir::SubgraphIndex) override { _et->storeOperationsExecTime(); }
private:
std::unique_ptr<util::ITimer> _timer;
@@ -62,24 +69,25 @@ private:
const ir::Graph &_graph;
};
-class ChromeTracingObserver : public IExecutionObserver
+class TracingObserver : public IExecutionObserver
{
public:
- ChromeTracingObserver(const std::string &filepath, const ir::Graph &graph);
- ~ChromeTracingObserver();
- void handleBegin(IExecutor *) override;
- void handleBegin(IExecutor *, const ir::OpSequence *, const backend::Backend *) override;
- void handleEnd(IExecutor *, const ir::OpSequence *, const backend::Backend *) override;
- void handleEnd(IExecutor *) override;
-
-private:
- static std::string opSequenceTag(const ir::OpSequence *op_seq, const ir::Operations &operations);
+ TracingObserver(const std::string &filepath, const ir::Graph &graph,
+ const util::TracingCtx *tracing_ctx);
+ ~TracingObserver();
+ void handleSubgraphBegin(ir::SubgraphIndex) override;
+ void handleJobBegin(IExecutor *, ir::SubgraphIndex, ir::OperationIndex,
+ const backend::Backend *) override;
+ void handleJobEnd(IExecutor *, ir::SubgraphIndex, ir::OperationIndex,
+ const backend::Backend *) override;
+ void handleSubgraphEnd(ir::SubgraphIndex) override;
private:
- std::ofstream _ofs;
- EventRecorder _recorder;
+ std::unique_ptr<EventRecorder> _recorder;
EventCollector _collector;
const ir::Graph &_graph;
+ EventWriter *_event_writer;
+ const util::TracingCtx *_tracing_ctx;
};
} // namespace exec
diff --git a/runtime/onert/core/src/exec/ExecutorBase.cc b/runtime/onert/core/src/exec/ExecutorBase.cc
index f835a9675..0bc088b02 100644
--- a/runtime/onert/core/src/exec/ExecutorBase.cc
+++ b/runtime/onert/core/src/exec/ExecutorBase.cc
@@ -16,10 +16,9 @@
#include "ExecutorBase.h"
-#include "backend/ITensor.h"
-#include "backend/controlflow/UserTensor.h"
-#include "backend/cpu_common/Tensor.h"
-#include "util/logging.h"
+#include "ShapeConverter.h"
+
+#include <misc/polymorphic_downcast.h>
namespace onert
{
@@ -27,68 +26,29 @@ namespace exec
{
ExecutorBase::ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph,
- const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
- const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
+ backend::BackendContexts &&backend_contexts,
const compiler::TensorRegistries &tensor_regs,
- backend::TensorManagerSet &&tensor_mgrs)
- : _lowered_graph{std::move(lowered_graph)}, _graph{_lowered_graph->graph()},
- _input_tensors{input_tensors}, _output_tensors{output_tensors},
- _tensor_mgrs{std::move(tensor_mgrs)}, _mutex()
+ const util::TracingCtx *tracing_ctx)
+ : _lowered_graph{std::move(lowered_graph)},
+ _backend_contexts{std::move(backend_contexts)}, _graph{_lowered_graph->graph()}, _mutex(),
+ _tracing_ctx(tracing_ctx)
{
- // TODO Fix the way of knowing whether it is primary or not
- bool primary_executor = !(_input_tensors.empty() && _output_tensors.empty());
- if (!primary_executor)
- {
- auto build_input_tensor_list = [&](const onert::ir::OperandIndexSequence &ind_seq) {
- std::vector<std::shared_ptr<backend::ITensor>> list;
- for (auto ind : ind_seq)
- {
- std::shared_ptr<backend::ITensor> tensor = tensor_regs.getITensor(ind);
- assert(tensor != nullptr);
- DynAllocInfo dyn_alloc_info{ind};
- _input_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
- list.push_back(tensor);
- }
- return list;
- };
- auto build_output_tensor_list = [&](const onert::ir::OperandIndexSequence &ind_seq) {
- std::vector<std::shared_ptr<backend::ITensor>> list;
- for (auto ind : ind_seq)
- {
- std::shared_ptr<backend::ITensor> tensor = tensor_regs.getITensor(ind);
- assert(tensor != nullptr);
- DynAllocInfo dyn_alloc_info{ind};
- _output_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
- list.push_back(tensor);
- }
- return list;
- };
- _input_tensors = build_input_tensor_list(_graph.getInputs());
- _output_tensors = build_output_tensor_list(_graph.getOutputs());
- }
- else
- {
- assert(input_tensors.size() == _graph.getInputs().size());
- assert(output_tensors.size() == _graph.getOutputs().size());
- for (uint32_t i = 0; i < input_tensors.size(); i++)
+ auto build_tensor_list = [&](const auto &ind_seq, auto &tensors) {
+ assert(tensors.empty());
+ for (auto &&ind : ind_seq)
{
- auto tensor = input_tensors[i];
- auto ind = _graph.getInputs().at(i);
- DynAllocInfo dyn_alloc_info{ind};
- _input_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
+ backend::ITensor *tensor = tensor_regs.getITensor(ind);
+ assert(tensor != nullptr);
+ auto io_tensor = nnfw::misc::polymorphic_downcast<backend::builtin::IOTensor *>(tensor);
+ tensors.push_back(io_tensor);
}
- for (uint32_t i = 0; i < output_tensors.size(); i++)
- {
- auto tensor = output_tensors[i];
- auto ind = _graph.getOutputs().at(i);
- DynAllocInfo dyn_alloc_info{ind};
- _output_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
- }
- }
+ };
+ build_tensor_list(_graph.getInputs(), _input_tensors);
+ build_tensor_list(_graph.getOutputs(), _output_tensors);
}
-void ExecutorBase::execute(const std::vector<std::shared_ptr<backend::ITensor>> &src_tensors,
- const std::shared_ptr<IPermuteFunction> &pre_fn)
+void ExecutorBase::execute(const std::vector<backend::IPortableTensor *> &inputs,
+ const std::vector<backend::IPortableTensor *> &outputs)
{
// For thread-safe, use mutex
// TODO: if all used backends on this executor are thread-safe,
@@ -96,41 +56,43 @@ void ExecutorBase::execute(const std::vector<std::shared_ptr<backend::ITensor>>
// Deadlock occurs when an Executor is called recursively.
std::lock_guard<std::mutex> lock(_mutex);
- assert(src_tensors.size() == _graph.getInputs().size());
- assert(src_tensors.size() == _input_tensors.size());
- for (uint32_t n = 0; n < _graph.getInputs().size(); ++n)
+ assert(inputs.size() == _graph.getInputs().size());
+ assert(inputs.size() == _input_tensors.size());
+ for (uint32_t n = 0; n < inputs.size(); ++n)
{
- // when user changes input shape, the input tensor is dynamic and its memory is not allocated.
- // This code find the info to allocate dynamic tensor, and allocate memory based on the source
- // tensor's shape set by caller.
- const auto src_tensor = src_tensors[n];
+ const auto input = inputs[n];
+ assert(input->buffer() != nullptr);
auto input_tensor = _input_tensors[n];
- // If src_tensor or input_tensor is nullptr, pre_fn does not copy the tensors
- if (src_tensor != nullptr && input_tensor != nullptr)
+ assert(input_tensor != nullptr);
+ if (input != nullptr)
{
- auto dyn_alloc_info = _input_to_dyn_alloc_info.find(_input_tensors[n]);
- const auto orig_input_shape = input_tensor->getShape();
- const auto changed_input_shape =
- convertShape(src_tensor->getShape(), src_tensor->layout(), input_tensor->layout());
+ const auto &orig_input_shape = input_tensor->orig_info().shape();
+ const auto &changed_input_shape =
+ convertShape(input->getShape(), input->layout(), input_tensor->orig_layout());
+ if (input_tensor->get_info().shape() != changed_input_shape)
+ {
+ // TODO Fix this workaround that is introduced since cpu based kernels directly use `_info`
+ // rather than interface methods to avoid virtual function calls.
+ input_tensor->setShapeOfIPortableTensor(changed_input_shape);
+ }
if (orig_input_shape != changed_input_shape)
{
- if (dyn_alloc_info == _input_to_dyn_alloc_info.end())
- {
- // The input_tensor is a dynamic tensor of backend that doesn't support dynamic tensor
- throw std::runtime_error("Unknown dim is found at execution time for a backend that "
- "does not support dynamic tensor");
- }
- else
- {
- input_tensor->set_dynamic();
- }
+ input_tensor->set_dynamic();
}
}
+ input_tensor->setTensor(input);
}
- // TODO Move calling permute_fn.run() into executeImpl()
- assert(pre_fn);
- pre_fn->run();
+ assert(outputs.size() == _graph.getOutputs().size());
+ assert(outputs.size() == _output_tensors.size());
+ for (uint32_t n = 0; n < outputs.size(); ++n)
+ {
+ const auto output = outputs[n];
+ // assert(dst_tensor->buffer() != nullptr);
+ auto output_tensor = _output_tensors[n];
+ assert(output_tensor != nullptr);
+ output_tensor->setTensor(output);
+ }
executeImpl();
}
@@ -146,32 +108,50 @@ void ExecutorBase::execute(const IODescription &desc)
assert(_input_tensors.size() == desc.inputs.size());
for (uint32_t i = 0; i < _input_tensors.size(); ++i)
{
- // TODO Remove dynamic_cast
- auto tensor = std::dynamic_pointer_cast<backend::controlflow::UserTensor>(_input_tensors[i]);
- assert(tensor);
+ auto tensor = _input_tensors[i];
+
+ // TODO Check if (desc.inputs[i] == nullptr)
+ // TODO Better design for ITensor? (we need const_cast as ITensor is writable)
+ tensor->setUserTensor(static_cast<uint8_t *>(const_cast<void *>(desc.inputs[i]->buffer)),
+ desc.inputs[i]->size);
+
auto input_shape = desc.dynamic_input_shapes.find(ir::IOIndex{i});
if (input_shape != desc.dynamic_input_shapes.end())
{
tensor->set_dynamic();
tensor->setShape(input_shape->second);
+ /*
+ * Changes tensor shape and allocate memory since its shape was changed
+ * perhaps by nnfw_set_input_tensorinfo()
+ *
+ * Cases are:
+ * 1) static operand -> nnfw_set_input_tensorinfo() -> execute() -> execute()
+ * (a) (b)
+ *
+ * at (a), operand is static, tensor is static - memory dealloc is not needed
+ * (DynamicTensorManager cannot dealloc memory allocated by StaticTensorManager)
+ * at (b), operand is static, tensor is dynamic - memory dealloc is needed
+ *
+ * 2) dynamic operand -> nnfw_set_input_tensorinfo() -> execute() -> execute()
+ * (a) (b)
+ *
+ * at (a), operand is dynamic, tensor is dynamic - memory dealloc is not needed
+ * since it has not been allocated yet
+ * at (b), operand is dynamic, tensor is dynamic - memory dealloc is needed
+ */
+ tensor->applyShape(input_shape->second);
}
- // TODO Better design for ITensor? (we need const_cast as ITensor is writable)
- tensor->setBuffer(static_cast<uint8_t *>(const_cast<void *>(desc.inputs[i]->buffer)),
- desc.inputs[i]->size);
-
- handleDynamicInputTensor(ir::IOIndex{i}, desc);
}
assert(_output_tensors.size() == desc.outputs.size());
for (uint32_t i = 0; i < _output_tensors.size(); ++i)
{
- // TODO Remove dynamic_cast
- auto tensor = std::dynamic_pointer_cast<backend::controlflow::UserTensor>(_output_tensors[i]);
- assert(tensor);
+ auto tensor = _output_tensors[i];
+
+ if (desc.outputs[i] == nullptr)
+ throw std::runtime_error{"Output " + std::to_string(i) + "'s buffer is not set."};
+ tensor->setUserTensor(static_cast<uint8_t *>(desc.outputs[i]->buffer), desc.outputs[i]->size);
tensor->set_dynamic(); // It can't be resized but shape could change
- // TODO Better design for ITensor? (we need const_cast as ITensor is writable)
- tensor->setBuffer(static_cast<uint8_t *>(const_cast<void *>(desc.outputs[i]->buffer)),
- desc.outputs[i]->size);
}
executeImpl();
@@ -190,51 +170,13 @@ void ExecutorBase::execute(const IODescription &desc)
// set shape of outputDesc to tensor shape since tensor can be dynamic
const auto output_tensor_shape = _output_tensors[n]->getShape();
output.info.shape(
- convertShape(output_tensor_shape, _output_tensors[n]->layout(), output.layout));
- }
-}
-
-/**
- * @brief Changes tensor shape and allocate memory
- * if input shape was changed by nnfw_set_input_tensorinfo()
- *
- * @note Cases are:
- * 1) static operand -> nnfw_set_input_tensorinfo() -> execute() -> execute()
- * (a) (b)
- *
- * at (a), operand is static, tensor is static - memory dealloc is not needed
- * (DynamicTensorManager cannot dealloc memory allocated by StaticTensorManager)
- * at (b), operand is static, tensor is dynamic - memory dealloc is needed
- *
- * 2) dynamic operand -> nnfw_set_input_tensorinfo() -> execute() -> execute()
- * (a) (b)
- *
- * at (a), operand is dynamic, tensor is dynamic - memory dealloc is not needed
- * since it has not been allocated yet
- * at (b), operand is dynamic, tensor is dynamic - memory dealloc is needed
- */
-void ExecutorBase::handleDynamicInputTensor(ir::IOIndex io_ind, const IODescription &desc)
-{
- auto shape_sig_found = desc.dynamic_input_shapes.find(io_ind);
- if (shape_sig_found != desc.dynamic_input_shapes.end())
- {
- auto dyn_alloc_info = _input_to_dyn_alloc_info.find(_input_tensors[io_ind.value()]);
- if (dyn_alloc_info == _input_to_dyn_alloc_info.end())
- throw std::runtime_error("Unknown dim is found at execution time for a backend that "
- "does not support dynamic tensor");
-
- auto changed_input_shape = shape_sig_found->second;
- auto operand_ind = dyn_alloc_info->second.ind;
-
- auto dyn_tensor_manager = _input_tensors[io_ind.value()]->dynamic_tensor_manager();
- assert(dyn_tensor_manager);
- dyn_tensor_manager->applyShape(operand_ind, changed_input_shape);
+ convertShape(output_tensor_shape, _output_tensors[n]->layout(), output.layout));
}
}
bool ExecutorBase::hasDynamicInput()
{
- for (auto &tensor : _input_tensors)
+ for (auto &&tensor : _input_tensors)
{
if (tensor->is_dynamic())
return true;
diff --git a/runtime/onert/core/src/exec/ExecutorBase.h b/runtime/onert/core/src/exec/ExecutorBase.h
index a13be7dbf..4f97de922 100644
--- a/runtime/onert/core/src/exec/ExecutorBase.h
+++ b/runtime/onert/core/src/exec/ExecutorBase.h
@@ -17,25 +17,20 @@
#ifndef __ONERT_EXEC_EXECUTOR_BASE_H__
#define __ONERT_EXEC_EXECUTOR_BASE_H__
-#include <mutex>
+#include "ExecutionObservee.h"
+#include "../backend/builtin/IOTensor.h"
+#include "../compiler/TensorRegistries.h"
-#include "IPermuteFunction.h"
-#include "Source.h"
-#include "exec/ExecutionObservers.h"
-#include "Sink.h"
-#include "ShapeConverter.h"
-#include "exec/IExecutor.h"
#include "compiler/LoweredGraph.h"
-#include "ir/LowerInfoMap.h"
-#include "backend/IConfig.h"
-#include "backend/Backend.h"
-#include "exec/ExecTime.h"
-#include "exec/IFunction.h"
-#include "backend/IDynamicTensorManager.h"
-#include "backend/ITensorManager.h"
-#include "exec/ExecutionObservee.h"
-#include "compiler/TensorRegistries.h"
-#include <list>
+#include "exec/IExecutor.h"
+#include "exec/IODescription.h"
+#include "ir/Graph.h"
+#include "ir/OperationIndexMap.h"
+#include "util/TracingCtx.h"
+
+#include <memory>
+#include <mutex>
+#include <vector>
namespace onert
{
@@ -51,26 +46,18 @@ public:
* @param tensor_builders Tensor builders that are currently used
*/
ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph,
- const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
- const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorRegistries &tensor_regs,
- backend::TensorManagerSet &&tensor_mgrs);
+ backend::BackendContexts &&backend_contexts,
+ const compiler::TensorRegistries &tensor_regs, const util::TracingCtx *tracing_ctx);
virtual ~ExecutorBase() = default;
- const ir::Graph &graph() final { return _graph; }
-
- /**
- * @brief Execute without IODescription
- *
- * @param src_tensor Tensor list that will be copied to input tensors of this
- * @param pre_fn The permutation function that copy from src_tensor to input tensors of this
- */
- void execute(const std::vector<std::shared_ptr<backend::ITensor>> &src_tensors,
- const std::shared_ptr<IPermuteFunction> &pre_fn);
+ const ir::Graph &graph() const final { return _graph; }
void execute(const IODescription &desc) final;
+ void execute(const std::vector<backend::IPortableTensor *> &inputs,
+ const std::vector<backend::IPortableTensor *> &outputs) override;
+
// Used only in Dataflow and Parallel Executors
void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>> ranks) final
{
@@ -81,17 +68,16 @@ public:
void addObserver(std::unique_ptr<IExecutionObserver> ref) { _subject.add(std::move(ref)); };
- const std::vector<std::shared_ptr<backend::ITensor>> &getInputTensors() const
+ const std::vector<backend::builtin::IOTensor *> &getInputTensors() const override
{
return _input_tensors;
}
- const std::vector<std::shared_ptr<backend::ITensor>> &getOutputTensors() const
+ const std::vector<backend::builtin::IOTensor *> &getOutputTensors() const override
{
return _output_tensors;
}
-
- const DynAllocInfoMap &getInputsDynamicAllocInfo() const { return _input_to_dyn_alloc_info; }
+ backend::BackendContexts &getBackendContexts() { return _backend_contexts; }
protected:
/**
@@ -103,16 +89,12 @@ protected:
ExecutionObservee _subject;
std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
std::unique_ptr<compiler::LoweredGraph> _lowered_graph;
+ backend::BackendContexts _backend_contexts;
const ir::Graph &_graph;
- std::vector<std::shared_ptr<backend::ITensor>> _input_tensors;
- std::vector<std::shared_ptr<backend::ITensor>> _output_tensors;
- DynAllocInfoMap _input_to_dyn_alloc_info;
- DynAllocInfoMap _output_to_dyn_alloc_info;
- backend::TensorManagerSet _tensor_mgrs;
+ std::vector<backend::builtin::IOTensor *> _input_tensors;
+ std::vector<backend::builtin::IOTensor *> _output_tensors;
std::mutex _mutex;
-
-private:
- void handleDynamicInputTensor(ir::IOIndex input_index, const IODescription &desc);
+ const util::TracingCtx *_tracing_ctx;
};
} // namespace exec
diff --git a/runtime/onert/core/src/exec/Executors.cc b/runtime/onert/core/src/exec/Executors.cc
new file mode 100644
index 000000000..8a1be3df4
--- /dev/null
+++ b/runtime/onert/core/src/exec/Executors.cc
@@ -0,0 +1,649 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Executors.h"
+
+#include "../backend/builtin/IOTensor.h"
+
+namespace
+{
+
+using namespace onert;
+
+int32_t find_input_index(const std::vector<ir::IODesc> &pkg_inputs,
+ const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+ const ir::IOIndex &io_index)
+{
+ for (size_t i = 0; i < pkg_inputs.size(); i++)
+ {
+ auto &input_desc = pkg_inputs[i];
+ if ((std::get<ir::ModelIndex>(input_desc) == model_index) &&
+ (std::get<ir::SubgraphIndex>(input_desc) == subg_index) &&
+ (std::get<ir::IOIndex>(input_desc) == io_index))
+ return static_cast<int32_t>(i);
+ }
+ return -1;
+}
+
+int32_t find_output_index(const std::vector<ir::IODesc> &pkg_outputs,
+ const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+ const ir::IOIndex &io_index)
+{
+ for (size_t i = 0; i < pkg_outputs.size(); i++)
+ {
+ auto &input_desc = pkg_outputs[i];
+ if ((std::get<ir::ModelIndex>(input_desc) == model_index) &&
+ (std::get<ir::SubgraphIndex>(input_desc) == subg_index) &&
+ (std::get<ir::IOIndex>(input_desc) == io_index))
+ return static_cast<int32_t>(i);
+ }
+ return -1;
+}
+
+} // namespace
+
+namespace onert
+{
+namespace exec
+{
+
+class Executors::EdgeTensor : public backend::builtin::IOTensor
+{
+public:
+ EdgeTensor(const ir::OperandInfo &info, ir::Layout layout)
+ : backend::builtin::IOTensor(info, layout), _buffer{nullptr}, _ref_count{0}
+ {
+ }
+ ~EdgeTensor() = default;
+
+ void allocate_buffer()
+ {
+ const auto total_size = orig_info().total_size();
+ _buffer = std::make_unique<uint8_t[]>(total_size);
+ _ref_count = 1;
+
+ // NOTE Executor's inputs/outputs are always IPortableTensor. If backend of inputs/outputs
+ // is using tensor that does not inherit IPortableTensor, Permute operation is added
+ // and all inputs/outputs become IPortableTensor at compile stage.
+ // This allows user's buffers to be set to inputs/outputs of executors.
+ setUserTensor(_buffer.get(), total_size);
+ }
+
+ void increase_ref() { _ref_count++; }
+
+ void decrease_ref()
+ {
+ assert(_ref_count > 0);
+ _ref_count--;
+ if (_ref_count == 0)
+ {
+ _buffer.reset();
+ setUserTensor(nullptr, orig_info().total_size());
+ }
+ }
+
+private:
+ std::unique_ptr<uint8_t[]> _buffer;
+ int32_t _ref_count;
+};
+
+void Executors::emplace(const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+ std::unique_ptr<IExecutor> exec)
+{
+ _executors.emplace(std::make_pair(model_index, subg_index), std::move(exec));
+}
+
+IExecutor *Executors::at(const ir::ModelIndex &model_index,
+ const ir::SubgraphIndex &subg_index) const
+{
+ return _executors.at(std::make_pair(model_index, subg_index)).get();
+}
+
+uint32_t Executors::inputSize() const { return _model_edges->pkg_inputs.size(); }
+
+uint32_t Executors::outputSize() const { return _model_edges->pkg_outputs.size(); }
+
+const ir::OperandInfo &Executors::inputInfo(const ir::IOIndex &index) const
+{
+ auto const desc = _model_edges->pkg_inputs[index.value()];
+ auto const model_index = std::get<0>(desc);
+ auto const subg_index = std::get<1>(desc);
+ auto const io_index = std::get<2>(desc);
+ auto const executor = at(model_index, subg_index);
+ return executor->getInputTensors().at(io_index.value())->orig_info();
+}
+
+const ir::OperandInfo &Executors::outputInfo(const ir::IOIndex &index) const
+{
+ auto const desc = _model_edges->pkg_outputs[index.value()];
+ auto const model_index = std::get<0>(desc);
+ auto const subg_index = std::get<1>(desc);
+ auto const io_index = std::get<2>(desc);
+ auto const executor = at(model_index, subg_index);
+ return executor->getOutputTensors().at(io_index.value())->orig_info();
+}
+
+// Allow below edges only
+// m1 < m2, s1 == 0 and s2 == 0 if m1:s1:o1 -> m2:s2:o2'
+void Executors::checkSupportedMultimodel() const
+{
+ // If package includes no-connection model, model_count is less than real model count in package.
+ // Then this method will throw exception based on model index
+ // 1st model: input assumption
+ // Otherwise: edges assumption
+
+ // Assumption: edges
+ // m1 < m2, s1 == 0 and s2 == 0 if edge 'm1:s1:o1 -> m2:s2:o2'
+ for (auto &&edge : _model_edges->edges)
+ {
+ auto const model_from = std::get<ir::ModelIndex>(edge.from);
+ auto const model_to = std::get<ir::ModelIndex>(edge.to);
+ auto const subg_from = std::get<ir::SubgraphIndex>(edge.from);
+ auto const subg_to = std::get<ir::SubgraphIndex>(edge.to);
+
+ if (model_from.value() == model_to.value())
+ {
+ throw std::runtime_error{"Multi model's edge set has invalid edge"};
+ }
+
+ if ((model_from.value() > model_to.value()) || (subg_from != ir::SubgraphIndex{0}) ||
+ (subg_to != ir::SubgraphIndex{0}))
+ throw std::runtime_error{"NYI: Multi model execution for this edge set is not supported yet"};
+ }
+
+ // Assumption: package inputs
+ // All 1st model inputs come from package input if always m1 < m2
+ {
+ auto first_executor = at(ir::ModelIndex{0}, ir::SubgraphIndex{0});
+ auto search_first_model = [&](const ir::IOIndex &input_index) {
+ for (const auto &input : _model_edges->pkg_inputs)
+ {
+ if ((std::get<ir::ModelIndex>(input) == ir::ModelIndex{0}) ||
+ (std::get<ir::SubgraphIndex>(input) == ir::SubgraphIndex{0}) ||
+ (std::get<ir::IOIndex>(input) == input_index))
+ return true;
+ }
+
+ return false;
+ };
+
+ for (uint32_t i = 0; i < first_executor->getInputTensors().size(); i++)
+ {
+ if (!search_first_model(ir::IOIndex{i}))
+ throw std::runtime_error{"Cannot find 1st model's input buffer"};
+ }
+ }
+
+ // Check whether nnpkg outputs and Edge `from` are duplicated
+ for (const auto &edge : _model_edges->edges)
+ {
+ if (std::find(_model_edges->pkg_outputs.begin(), _model_edges->pkg_outputs.end(), edge.from) !=
+ _model_edges->pkg_outputs.end())
+ {
+ throw std::runtime_error{"Multi model execution does not support duplicating nnpkg outputs "
+ "with `from` of edges yet"};
+ }
+ }
+}
+
+void Executors::createEdgeQuantLayers()
+{
+ if (_is_created_edge_quant_layers)
+ {
+ return;
+ }
+
+ // Create EdgeTensor for edges between executors
+ for (const auto &pair : _edge_map)
+ {
+ const auto &from_iodesc = pair.first;
+ const auto &from_model_index = std::get<ir::ModelIndex>(from_iodesc);
+ const auto &from_subg_index = std::get<ir::SubgraphIndex>(from_iodesc);
+ const auto &from_io_index = std::get<ir::IOIndex>(from_iodesc);
+
+ const auto from_executor = _executors.at({from_model_index, from_subg_index}).get();
+ const auto from_tensor = from_executor->getOutputTensors().at(from_io_index.value());
+
+ const auto &from_info = from_tensor->orig_info();
+ const auto from_layout = from_tensor->orig_layout();
+ _edge_tensors[from_iodesc] = std::make_unique<EdgeTensor>(from_info, from_layout);
+ }
+
+ // Append type-aware quantization layer for edges between executors
+ for (const auto &executor_pair : _executors)
+ {
+ const auto &executor_index = executor_pair.first;
+ const auto &model_index = executor_index.first;
+ const auto &subg_index = executor_index.second;
+
+ std::vector<backend::ITensor *> inputs;
+ std::vector<backend::ITensor *> outputs;
+ for (const auto &pair : _edge_map)
+ {
+ const auto &from_iodesc = pair.first;
+ if (std::get<ir::ModelIndex>(from_iodesc) == model_index &&
+ std::get<ir::SubgraphIndex>(from_iodesc) == subg_index)
+ {
+ const auto from_tensor = _edge_tensors[from_iodesc].get();
+ const auto &to_list = pair.second;
+
+ for (const auto &to_iodesc : to_list)
+ {
+ const auto &to_model_index = std::get<ir::ModelIndex>(to_iodesc);
+ const auto &to_subg_index = std::get<ir::SubgraphIndex>(to_iodesc);
+ const auto &to_io_index = std::get<ir::IOIndex>(to_iodesc);
+
+ const auto to_executor = _executors.at({to_model_index, to_subg_index}).get();
+ const auto to_tensor = to_executor->getInputTensors().at(to_io_index.value());
+
+ // TODO Unify tensors with the same `from` tensor and same type
+ if (from_tensor->data_type() != to_tensor->data_type())
+ {
+ assert(inputs.size() == outputs.size());
+ const auto &to_info =
+ to_executor->getInputTensors().at(to_io_index.value())->orig_info();
+ const auto to_layout = to_tensor->orig_layout();
+ inputs.emplace_back(from_tensor);
+
+ auto type_aware_quant_tensor = std::make_unique<EdgeTensor>(to_info, to_layout);
+ outputs.emplace_back(type_aware_quant_tensor.get());
+
+ _edge_quant_tensors[to_iodesc] = std::move(type_aware_quant_tensor);
+ }
+ }
+ }
+ }
+
+ auto layer = std::make_unique<PermuteLayer>(inputs, outputs);
+ layer->prepare();
+ _edge_quant_layers[{model_index, subg_index}] = std::move(layer);
+ }
+
+ _is_created_edge_quant_layers = true;
+}
+
+void Executors::CreatePkgIOTensors(const IODescription &desc)
+{
+ for (const auto &pkg_input : _model_edges->pkg_inputs)
+ {
+ // Create IOTensor for nnpkg inputs
+ const auto &model_index = std::get<ir::ModelIndex>(pkg_input);
+ const auto &subg_index = std::get<ir::SubgraphIndex>(pkg_input);
+ const auto &io_index = std::get<ir::IOIndex>(pkg_input);
+ const auto input_pkg_index =
+ find_input_index(_model_edges->pkg_inputs, model_index, subg_index, io_index);
+ if (input_pkg_index == -1)
+ throw std::runtime_error{"Cannot find multi model input index"};
+ auto input_desc = desc.inputs[input_pkg_index].get();
+ _pkg_input_tensors[pkg_input] =
+ std::make_unique<backend::builtin::IOTensor>(input_desc->info, input_desc->layout);
+ }
+
+ for (const auto &pkg_output : _model_edges->pkg_outputs)
+ {
+ // Create IOTensor for nnpkg outputs
+ const auto &model_index = std::get<ir::ModelIndex>(pkg_output);
+ const auto &subg_index = std::get<ir::SubgraphIndex>(pkg_output);
+ const auto &io_index = std::get<ir::IOIndex>(pkg_output);
+ const auto output_pkg_index =
+ find_output_index(_model_edges->pkg_outputs, model_index, subg_index, io_index);
+ if (output_pkg_index == -1)
+ throw std::runtime_error{"Cannot find multi model output index"};
+ auto output_desc = desc.outputs[output_pkg_index].get();
+ _pkg_output_tensors[pkg_output] =
+ std::make_unique<backend::builtin::IOTensor>(output_desc->info, output_desc->layout);
+ }
+}
+
+void Executors::createPkgIOQuantLayers(const IODescription &desc)
+{
+ // Append type-aware quantization layer for nnpkg inputs/outputs between executors
+ for (const auto &pair : _executors)
+ {
+ const auto &executor_index = pair.first;
+ const auto &model_index = executor_index.first;
+ const auto &subg_index = executor_index.second;
+ const auto executor = pair.second.get();
+
+ // Find pkg inputs of current executor
+ std::vector<ir::IODesc> pkg_inputs;
+ for (const auto &pkg_input : _model_edges->pkg_inputs)
+ {
+ if (std::get<ir::ModelIndex>(pkg_input) == model_index &&
+ std::get<ir::SubgraphIndex>(pkg_input) == subg_index)
+ {
+ pkg_inputs.emplace_back(pkg_input);
+ }
+ }
+ std::vector<backend::ITensor *> src_tensors;
+ std::vector<backend::ITensor *> dst_tensors;
+ for (const auto &pkg_input : pkg_inputs)
+ {
+ const auto &io_index = std::get<ir::IOIndex>(pkg_input);
+ const auto input_pkg_index =
+ find_input_index(_model_edges->pkg_inputs, model_index, subg_index, io_index);
+ if (input_pkg_index == -1)
+ throw std::runtime_error{"Cannot find multi model input index"};
+ auto input_desc = desc.inputs[input_pkg_index].get();
+
+ // Create EdgeTensor for nnpkg input if type is different
+ const auto input_tensor =
+ executor->getInputTensors().at(std::get<ir::IOIndex>(pkg_input).value());
+ const auto &orig_info = input_tensor->orig_info();
+ if (input_desc->info.typeInfo().type() != input_tensor->orig_info().typeInfo().type())
+ {
+ const auto orig_layout = input_tensor->orig_layout();
+ auto pkg_input_edge_tensor = std::make_unique<EdgeTensor>(orig_info, orig_layout);
+ _pkg_input_quant_tensors[pkg_input] = std::move(pkg_input_edge_tensor);
+
+ // Append type-aware quantization layer's inputs/outputs
+ src_tensors.emplace_back(_pkg_input_tensors[pkg_input].get());
+ dst_tensors.emplace_back(_pkg_input_quant_tensors[pkg_input].get());
+ }
+ }
+
+ // Create type-aware quantization layer for nnpkg inputs
+ auto pkg_input_layer = std::make_unique<PermuteLayer>(src_tensors, dst_tensors);
+ pkg_input_layer->prepare();
+ _pkg_input_quant_layers[{model_index, subg_index}] = std::move(pkg_input_layer);
+
+ // Find pkg outputs of current executor
+ std::vector<ir::IODesc> pkg_outputs;
+ for (const auto &pkg_output : _model_edges->pkg_outputs)
+ {
+ if (std::get<ir::ModelIndex>(pkg_output) == model_index &&
+ std::get<ir::SubgraphIndex>(pkg_output) == subg_index)
+ {
+ pkg_outputs.emplace_back(pkg_output);
+ }
+ }
+ src_tensors.clear();
+ dst_tensors.clear();
+ // Create Tensors of nnpkg outputs for type-aware quantization
+ for (const auto &pkg_output : pkg_outputs)
+ {
+ const auto &io_index = std::get<ir::IOIndex>(pkg_output);
+ const auto output_pkg_index =
+ find_output_index(_model_edges->pkg_outputs, model_index, subg_index, io_index);
+ if (output_pkg_index == -1)
+ throw std::runtime_error{"Cannot find multi model output index"};
+ auto output_desc = desc.outputs[output_pkg_index].get();
+
+ // Create EdgeTensor for nnpkg output if type is different
+ const auto output_tensor =
+ executor->getOutputTensors().at(std::get<ir::IOIndex>(pkg_output).value());
+ const auto &orig_info = output_tensor->orig_info();
+ if (output_desc->info.typeInfo().type() != output_tensor->orig_info().typeInfo().type())
+ {
+ const auto orig_layout = output_tensor->orig_layout();
+ auto pkg_output_edge_tensor = std::make_unique<EdgeTensor>(orig_info, orig_layout);
+ _pkg_output_quant_tensors[pkg_output] = std::move(pkg_output_edge_tensor);
+
+ // Append type-aware quantization layer's inputs/outputs
+ src_tensors.emplace_back(_pkg_output_quant_tensors[pkg_output].get());
+ dst_tensors.emplace_back(_pkg_output_tensors[pkg_output].get());
+ }
+ }
+
+ // Create type-aware quantization layer for nnpkg outputs
+ auto pkg_output_layer = std::make_unique<PermuteLayer>(src_tensors, dst_tensors);
+ pkg_output_layer->prepare();
+ _pkg_output_quant_layers[{model_index, subg_index}] = std::move(pkg_output_layer);
+ }
+}
+
+void Executors::execute(const IODescription &desc)
+{
+ // Check supported multi model package
+ checkSupportedMultimodel();
+
+ // TODO Move creating type-aware quantization layers for edges in compilation stage
+ createEdgeQuantLayers();
+
+ // TODO Create IOTensors only once and recreate them only if nnpkg info changes
+ CreatePkgIOTensors(desc);
+
+ // TODO Create type-aware quantization layers only once and recreate them only if type changes
+ createPkgIOQuantLayers(desc);
+
+ // TODO Find better way to schedule order of executors
+ auto const model_count = modelCount();
+
+ auto find_from = [&](const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+ const ir::IOIndex &io_index) {
+ for (const auto &edge : _model_edges->edges)
+ {
+ if ((std::get<ir::ModelIndex>(edge.to) == model_index) &&
+ (std::get<ir::SubgraphIndex>(edge.to) == subg_index) &&
+ (std::get<ir::IOIndex>(edge.to) == io_index))
+ return edge.from;
+ }
+
+ throw std::runtime_error{"Cannot find edge for model input"};
+ };
+
+ // Execute each model
+ // NOTE May be better to use vector instead of unordered_map for _executors
+ for (auto model_index = ir::ModelIndex{0}; model_index.value() < model_count; model_index++)
+ {
+ // Find executor
+ auto executor = at(model_index, ir::SubgraphIndex{0});
+
+ // Set IOTensors
+ // TODO Set internal IOTensors only once
+ std::vector<backend::IPortableTensor *> inputs_inter;
+ std::vector<backend::IPortableTensor *> outputs_inter;
+ const auto &input_tensors = executor->getInputTensors();
+ const auto &output_tensors = executor->getOutputTensors();
+ auto const input_size = input_tensors.size();
+ auto const output_size = output_tensors.size();
+ inputs_inter.resize(input_size);
+ outputs_inter.resize(output_size);
+
+ // Set inputs of executor
+ // TODO Create layer to allocate/deallocate buffers of EdgeTensor for each executor
+ for (uint32_t i = 0; i < input_size; i++)
+ {
+ const auto input_pkg_index = find_input_index(_model_edges->pkg_inputs, model_index,
+ ir::SubgraphIndex{0}, ir::IOIndex{i});
+ const auto input_io_desc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+ if (input_pkg_index != -1)
+ {
+ // Allocate type-aware quantization tensors for nnpkg inputs and set internal tensors
+ if (_pkg_input_quant_tensors.find(input_io_desc) != _pkg_input_quant_tensors.end())
+ {
+ _pkg_input_quant_tensors[input_io_desc]->allocate_buffer();
+
+ inputs_inter[i] = _pkg_input_quant_tensors[input_io_desc].get();
+ }
+ else
+ {
+ inputs_inter[i] = _pkg_input_tensors[input_io_desc].get();
+ }
+
+ // Set buffer of IOTensor
+ auto input_desc = desc.inputs[input_pkg_index].get();
+ // TODO Remove const_cast (we need const_cast as ITensor is writable)
+ _pkg_input_tensors[input_io_desc]->setUserTensor(
+ reinterpret_cast<uint8_t *>(const_cast<void *>(input_desc->buffer)), input_desc->size);
+ }
+ else
+ {
+ auto from_iodesc = find_from(model_index, ir::SubgraphIndex{0}, ir::IOIndex{i});
+ const auto &from_model_index = std::get<ir::ModelIndex>(from_iodesc);
+ const auto &from_subg_index = std::get<ir::SubgraphIndex>(from_iodesc);
+ const auto &from_ioindex = std::get<ir::IOIndex>(from_iodesc).value();
+
+ // Supported only sequantial execution of models
+ assert(from_model_index.value() < model_index.value());
+ assert(from_subg_index.value() == 0);
+ const auto from_executor = _executors.at({from_model_index, from_subg_index}).get();
+ const auto to_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+ if (_edge_quant_tensors.find(to_iodesc) == _edge_quant_tensors.end())
+ {
+ inputs_inter[i] = from_executor->getOutputTensors().at(from_ioindex);
+ }
+ else
+ {
+ inputs_inter[i] = _edge_quant_tensors.at(to_iodesc).get();
+ }
+ assert(inputs_inter[i]->buffer() != nullptr);
+ }
+ }
+
+ // Set outputs of executor
+ for (uint32_t i = 0; i < output_size; i++)
+ {
+ const auto output_pkg_index = find_output_index(_model_edges->pkg_outputs, model_index,
+ ir::SubgraphIndex{0}, ir::IOIndex{i});
+ const auto output_io_desc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+ if (output_pkg_index != -1)
+ {
+ // Allocate type-aware quantization tensors for nnpkg outputs and set internal tensors
+ if (_pkg_output_quant_tensors.find(output_io_desc) != _pkg_output_quant_tensors.end())
+ {
+ _pkg_output_quant_tensors[output_io_desc]->allocate_buffer();
+
+ outputs_inter[i] = _pkg_output_quant_tensors[output_io_desc].get();
+ }
+ else
+ {
+ outputs_inter[i] = _pkg_output_tensors[output_io_desc].get();
+ }
+
+ // Set buffer of IOTensor
+ auto output_desc = desc.outputs[output_pkg_index].get();
+ _pkg_output_tensors[output_io_desc]->setUserTensor(
+ reinterpret_cast<uint8_t *>(output_desc->buffer), output_desc->size);
+ }
+ else
+ {
+ // Allocate buffer of `from` tensors
+ const auto from_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+ _edge_tensors[from_iodesc]->allocate_buffer();
+ outputs_inter[i] = _edge_tensors[from_iodesc].get();
+
+ // Allocate buffer of tensors for type-aware quantization
+ for (const auto &to_iodesc : _edge_map[from_iodesc])
+ {
+ _edge_tensors[from_iodesc]->increase_ref();
+ if (_edge_quant_tensors.find(to_iodesc) != _edge_quant_tensors.end())
+ {
+ auto type_aware_quant_tensor = _edge_quant_tensors.at(to_iodesc).get();
+ type_aware_quant_tensor->allocate_buffer();
+
+ _edge_tensors[from_iodesc]->decrease_ref();
+ }
+ }
+ }
+ }
+
+ _pkg_input_quant_layers[{model_index, ir::SubgraphIndex{0}}]->run();
+
+ executor->execute(inputs_inter, outputs_inter);
+
+ _edge_quant_layers[{model_index, ir::SubgraphIndex{0}}]->run();
+ _pkg_output_quant_layers[{model_index, ir::SubgraphIndex{0}}]->run();
+
+ // Release input buffers that are no longer needed
+ for (uint32_t i = 0; i < input_size; i++)
+ {
+ const auto input_pkg_index = find_input_index(_model_edges->pkg_inputs, model_index,
+ ir::SubgraphIndex{0}, ir::IOIndex{i});
+
+ const auto to_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+ if (input_pkg_index == -1)
+ {
+ if (_edge_quant_tensors.find(to_iodesc) != _edge_quant_tensors.end())
+ {
+ // Decrease reference count of tensor for type-aware quantization if input tensor is the
+ // tensor
+ const auto to_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+ if (_edge_quant_tensors.find(to_iodesc) != _edge_quant_tensors.end())
+ {
+ _edge_quant_tensors[to_iodesc]->decrease_ref();
+ }
+ }
+ else
+ {
+ // Decrease reference count of `from` tensor if input tensor is the `from` tensor
+ const auto from_iodesc = find_from(model_index, ir::SubgraphIndex{0}, ir::IOIndex{i});
+ _edge_tensors[from_iodesc]->decrease_ref();
+
+ // Decrease reference count of nnpkg inputs
+ if (_pkg_input_quant_tensors.find(to_iodesc) != _pkg_input_quant_tensors.end())
+ {
+ _pkg_input_quant_tensors[to_iodesc]->decrease_ref();
+ }
+ }
+ }
+ }
+
+ // Release output buffers if those buffers are no longer used other executors because of
+ // type-aware quantization
+ // FIXME if tensors for type-aware quantization unified for the same `from` tensor and same type
+ for (uint32_t i = 0; i < output_size; i++)
+ {
+ auto from_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+
+ // Check if other executors will use the buffer of edge tensor
+ const auto &to_list = _edge_map[from_iodesc];
+ if (to_list.size() == 0)
+ {
+ // This condition means `from_iodesc` tensor is an output of nnpkg
+ continue;
+ }
+
+ bool to_be_release =
+ !std::any_of(to_list.begin(), to_list.end(), [&](const ir::IODesc &to_iodesc) {
+ // This condition means another executor uses the buffer of edge tensor
+ return _edge_quant_tensors.find(to_iodesc) == _edge_quant_tensors.end();
+ });
+
+ if (to_be_release)
+ {
+ // This edge tensor's buffer won't be used in other executors
+ // Tensors for type-aware quantization take over the role of this edge tensor instead
+ _edge_tensors[from_iodesc]->decrease_ref();
+ }
+
+ // Decrease reference count of nnpkg outputs
+ if (_pkg_output_quant_tensors.find(from_iodesc) != _pkg_output_quant_tensors.end())
+ {
+ _pkg_output_quant_tensors[from_iodesc]->decrease_ref();
+ }
+ }
+ }
+}
+
+// modelCount() iterates _executors.
+// It assumes that Compiler will generate Executor for all models and _executors includes all
+// generated Executor.
+// If nnpackage includes model(s) which has no connection and Compiler does not
+// generate Executor for them, modelCount() return less value than real model count.
+uint16_t Executors::modelCount() const
+{
+ uint16_t model_count = 0;
+ for (; _executors.find(std::make_pair(ir::ModelIndex{model_count}, ir::SubgraphIndex{0})) !=
+ _executors.end();
+ model_count++)
+ ;
+
+ return model_count;
+}
+
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/Executors.h b/runtime/onert/core/src/exec/Executors.h
new file mode 100644
index 000000000..ac7489186
--- /dev/null
+++ b/runtime/onert/core/src/exec/Executors.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_EXECUTORS_H__
+#define __ONERT_EXEC_EXECUTORS_H__
+
+#include "exec/IExecutors.h"
+#include "ir/NNPkg.h"
+#include "IPermuteFunction.h"
+
+namespace std
+{
+
+template <> struct hash<std::pair<::onert::ir::ModelIndex, ::onert::ir::SubgraphIndex>>
+{
+ size_t
+ operator()(const std::pair<::onert::ir::ModelIndex, ::onert::ir::SubgraphIndex> &pair) const
+ noexcept
+ {
+ return (hash<uint32_t>()(pair.first.value()) << 16) ^ hash<uint32_t>()(pair.second.value());
+ }
+};
+
+} // namespace std
+
+namespace onert
+{
+namespace exec
+{
+
+/**
+ * @brief Class to gather executors
+ */
+class Executors : public IExecutors
+{
+public:
+ Executors(void) = delete;
+ Executors(std::unique_ptr<ir::ModelEdges> model_edges)
+ : _executors{}, _model_edges{std::move(model_edges)}, _edge_quant_layers{},
+ _edge_quant_tensors{}, _edge_tensors{}, _is_created_edge_quant_layers{false},
+ _pkg_input_quant_layers{}, _pkg_output_quant_layers{}, _pkg_input_quant_tensors{},
+ _pkg_output_quant_tensors{}, _pkg_input_tensors{}, _pkg_output_tensors{}
+ {
+ for (const auto &edge : _model_edges->edges)
+ {
+ _edge_map[edge.from].emplace_back(edge.to);
+ }
+ }
+ Executors(const Executors &) = delete;
+ Executors(Executors &&) = default;
+ ~Executors() = default;
+
+ // TODO Use Executor index
+ void emplace(const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+ std::unique_ptr<IExecutor> exec) override;
+
+ IExecutor *at(const ir::ModelIndex &model_index,
+ const ir::SubgraphIndex &subg_index) const override;
+
+ uint32_t inputSize() const override;
+
+ uint32_t outputSize() const override;
+
+ const ir::OperandInfo &inputInfo(const ir::IOIndex &index) const override;
+
+ const ir::OperandInfo &outputInfo(const ir::IOIndex &index) const override;
+
+ void execute(const IODescription &desc) override;
+
+private:
+ void checkSupportedMultimodel() const;
+ void createEdgeQuantLayers();
+ void CreatePkgIOTensors(const IODescription &desc);
+ void createPkgIOQuantLayers(const IODescription &desc);
+ uint16_t modelCount() const;
+
+private:
+ // TODO Remove this class
+ class PermuteLayer : public exec::IPermuteFunction
+ {
+ public:
+ PermuteLayer(const std::vector<backend::ITensor *> &inputs,
+ const std::vector<backend::ITensor *> &outputs)
+ {
+ assert(inputs.size() == outputs.size());
+ _src_tensors = inputs;
+ _dst_tensors = outputs;
+ }
+ virtual ~PermuteLayer() {}
+ void optimize() override {}
+ };
+
+ class EdgeTensor;
+
+private:
+ std::unordered_map<std::pair<ir::ModelIndex, ir::SubgraphIndex>, std::unique_ptr<IExecutor>>
+ _executors;
+
+ // NOTE _model_edges may use different struct type for executor implementation
+ std::unique_ptr<ir::ModelEdges> _model_edges;
+ std::unordered_map<ir::IODesc, std::vector<ir::IODesc>> _edge_map;
+
+ /**
+ * @brief Type-aware quantization layers for edges between executors
+ *
+ */
+ // TODO Move variables related to type-aware quantization for edges into compilation stage
+ // TODO Replace PermuteLayer with backend::builtin::kernel::PermuteLayer
+ std::unordered_map<std::pair<ir::ModelIndex, ir::SubgraphIndex>, std::unique_ptr<PermuteLayer>>
+ _edge_quant_layers;
+
+ /**
+ * @brief Tensors for type-aware quantization of edges
+ * Key: `to` IODesc, Value: EdgeTensor
+ */
+ //
+ // Q: Why is Key `to` IODesc
+ // A: these tensors are currently created depending on the type of `to`
+ // TODO Unify tensors with the same `from` tensor and same type
+ // NOTE The incomplete type 'EdgeTensor' cannot be declared as unique_ptr.
+ std::unordered_map<ir::IODesc, std::shared_ptr<EdgeTensor>> _edge_quant_tensors;
+
+ /**
+ * @brief Tensors for edges between executors that are not related to type-aware quantization
+ * Key: `from` IODesc, Value: EdgeTensor
+ */
+ // Q: Why is Key `from` IODesc
+ // A: `from` can be connected to multiple `to`
+ // NOTE The incomplete type 'EdgeTensor' cannot be declared as unique_ptr.
+ std::unordered_map<ir::IODesc, std::shared_ptr<EdgeTensor>> _edge_tensors;
+ /**
+ * @brief Whether type-aware quantization layers for edges between executors are created
+ *
+ */
+ // TODO Remove this member after the creation of type-aware quantization layers for edges
+ // is moved into compilation stage
+ bool _is_created_edge_quant_layers;
+
+ // TODO Replace PermuteLayer with backend::builtin::kernel::PermuteLayer
+ std::unordered_map<std::pair<ir::ModelIndex, ir::SubgraphIndex>, std::unique_ptr<PermuteLayer>>
+ _pkg_input_quant_layers;
+ // TODO Replace PermuteLayer with backend::builtin::kernel::PermuteLayer
+ std::unordered_map<std::pair<ir::ModelIndex, ir::SubgraphIndex>, std::unique_ptr<PermuteLayer>>
+ _pkg_output_quant_layers;
+ // Edge tensors of nnpkg inputs/outputs for type-aware quantization
+ std::unordered_map<ir::IODesc, std::shared_ptr<EdgeTensor>> _pkg_input_quant_tensors;
+ std::unordered_map<ir::IODesc, std::shared_ptr<EdgeTensor>> _pkg_output_quant_tensors;
+ // IOTensors for user buffer
+ std::unordered_map<ir::IODesc, std::unique_ptr<backend::builtin::IOTensor>> _pkg_input_tensors;
+ std::unordered_map<ir::IODesc, std::unique_ptr<backend::builtin::IOTensor>> _pkg_output_tensors;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_EXECUTORS_H__
diff --git a/runtime/onert/core/src/exec/FunctionSequence.cc b/runtime/onert/core/src/exec/FunctionSequence.cc
index fb31f7582..578123a54 100644
--- a/runtime/onert/core/src/exec/FunctionSequence.cc
+++ b/runtime/onert/core/src/exec/FunctionSequence.cc
@@ -16,8 +16,6 @@
#include "exec/FunctionSequence.h"
-#include "ir/Operation.h"
-#include "backend/IDynamicTensorManager.h"
#include "backend/ITensorRegistry.h"
#include "util/logging.h"
@@ -28,19 +26,19 @@ namespace exec
void FunctionSequence::run()
{
- // TODO Find out when `_enable_dynamic_shape_inferer` is true but `_dynamic_tensor_ctx` is false
if (_enable_dynamic_shape_inferer && _dynamic_tensor_ctx)
{
- if (_dynamic_tensor_ctx->op_seq->size() != _functions.size())
- throw std::runtime_error("operation and functions should be mapped one by one");
+ // acl_cl and acl_neon backend don't support dynamic shape.
+ // _dynamic_tensor_ctx is always nullptr for acl_cl and acl_neon
+ // Thus, those two bakends cannot reach here.
+
+ // Do dynamic shape inference
+ _dynamic_tensor_ctx->op->accept(*_dynamic_tensor_ctx->dynamic_shape_inferer);
- auto op_seq_iter = _dynamic_tensor_ctx->op_seq->begin();
for (const auto &function : _functions)
{
- // set shape of output and allocate memory when needed
- auto &op = _dynamic_tensor_ctx->operations->at(*op_seq_iter);
- op.accept(*_dynamic_tensor_ctx->dynamic_shape_inferer);
-
+ // NOTE the function could be also FunctionSequence so we do this
+ // TODO Remove this or do this recursively
auto *sub_func_seq = dynamic_cast<FunctionSequence *>(function.get());
if (sub_func_seq != nullptr)
{
@@ -50,22 +48,12 @@ void FunctionSequence::run()
// run kernel
function->run();
-
- // deallocate input tensors which is no longer used
- _dynamic_tensor_ctx->dynamic_tensor_manager->deallocInput(*op_seq_iter);
-
- op_seq_iter++;
}
}
else
{
for (const auto &function : _functions)
{
- auto *sub_func_seq = dynamic_cast<FunctionSequence *>(function.get());
- if (sub_func_seq != nullptr)
- {
- sub_func_seq->enableDynamicShapeInferer(false);
- }
function->run();
}
}
diff --git a/runtime/onert/core/src/exec/IPermuteFunction.cc b/runtime/onert/core/src/exec/IPermuteFunction.cc
new file mode 100644
index 000000000..9d548e6dc
--- /dev/null
+++ b/runtime/onert/core/src/exec/IPermuteFunction.cc
@@ -0,0 +1,320 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IPermuteFunction.h"
+
+#include <cker/operation/Quantize.h>
+#include <cker/operation/Dequantize.h>
+#include "backend/IPortableTensor.h"
+#include "exec/IFunction.h"
+#include "ir/Index.h"
+#include "ir/Shape.h"
+#include <memory>
+#include <misc/polymorphic_downcast.h>
+#include <typeinfo>
+#include "util/Utils.h"
+#include <vector>
+#include <unordered_map>
+
+namespace
+{
+using namespace onert;
+
+inline nnfw::cker::Shape getShape(const backend::ITensor *tensor)
+{
+ const ir::Shape shape = tensor->getShape();
+
+ assert(tensor->layout() == ir::Layout::NHWC);
+
+ auto rank = shape.rank();
+ nnfw::cker::Shape ret(rank);
+ auto data = ret.DimsData();
+ for (int i = 0; i < rank; ++i)
+ {
+ data[i] = shape.dim(i);
+ }
+ return ret;
+}
+
+// Quantize per element
+template <typename InputT, typename OutputT>
+void elementwiseQuantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
+{
+ const auto scale = dst_tensor->data_scale();
+ const auto zero_point = dst_tensor->data_zero_point();
+
+ int min_val = std::numeric_limits<OutputT>::min();
+ int max_val = std::numeric_limits<OutputT>::max();
+
+ auto loop_shape = src_tensor->getShape();
+ const auto src_layout = src_tensor->layout();
+ const auto dst_layout = dst_tensor->layout();
+ const bool is_permutation = src_layout != dst_layout && loop_shape.rank() == 4;
+ ShapeLoop(loop_shape, [&](const onert::ir::Coordinates &coords) {
+ const InputT *input_data =
+ reinterpret_cast<const InputT *>(src_tensor->buffer() + src_tensor->calcOffset(coords));
+ int32_t unclamped = static_cast<int32_t>(round(*input_data / scale)) + zero_point;
+ int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
+
+ ir::Coordinates dst_coords =
+ is_permutation ? ir::convertCoordinates(coords, src_layout, dst_layout) : coords;
+ OutputT *output_data =
+ reinterpret_cast<OutputT *>(dst_tensor->buffer() + dst_tensor->calcOffset(dst_coords));
+ *output_data = clamped;
+ });
+}
+
+// TODO Optimize the case where tensors has the same layout
+template <typename InputT, typename OutputT>
+void quantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
+{
+ if (!src_tensor->has_padding() && !dst_tensor->has_padding() &&
+ src_tensor->layout() == dst_tensor->layout() && !src_tensor->is_dynamic())
+ {
+ assert(!dst_tensor->is_dynamic());
+
+ // Call optimized neon kernel
+ nnfw::cker::Quantize(getShape(src_tensor),
+ reinterpret_cast<const InputT *>(src_tensor->buffer()),
+ getShape(dst_tensor), reinterpret_cast<OutputT *>(dst_tensor->buffer()),
+ dst_tensor->data_scale(), dst_tensor->data_zero_point());
+ }
+ else
+ {
+ elementwiseQuantize<InputT, OutputT>(src_tensor, dst_tensor);
+ }
+}
+
+// Dequantize per element
+template <typename InputT, typename OutputT>
+void elementwiseDequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
+{
+ const auto scale = src_tensor->data_scale();
+ const auto zero_point = src_tensor->data_zero_point();
+
+ auto loop_shape = src_tensor->getShape();
+ const auto src_layout = src_tensor->layout();
+ const auto dst_layout = dst_tensor->layout();
+ const bool is_permutation = src_layout != dst_layout && loop_shape.rank() == 4;
+ ShapeLoop(loop_shape, [&](const onert::ir::Coordinates &coords) {
+ const InputT *input_data =
+ reinterpret_cast<const InputT *>(src_tensor->buffer() + src_tensor->calcOffset(coords));
+ const OutputT result = static_cast<OutputT>(scale * (*input_data - zero_point));
+
+ ir::Coordinates dst_coords =
+ is_permutation ? ir::convertCoordinates(coords, src_layout, dst_layout) : coords;
+ OutputT *output_data =
+ reinterpret_cast<OutputT *>(dst_tensor->buffer() + dst_tensor->calcOffset(dst_coords));
+ *output_data = result;
+ });
+}
+
+// TODO Optimize the case where tensors has the same layout
+template <typename InputT, typename OutputT>
+void dequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
+{
+ if (!src_tensor->has_padding() && !dst_tensor->has_padding() &&
+ src_tensor->layout() == dst_tensor->layout() && !src_tensor->is_dynamic())
+ {
+ assert(!dst_tensor->is_dynamic());
+
+ // Call optimized neon kernel
+ nnfw::cker::Dequantize(getShape(src_tensor),
+ reinterpret_cast<const InputT *>(src_tensor->buffer()),
+ getShape(dst_tensor), reinterpret_cast<OutputT *>(dst_tensor->buffer()),
+ src_tensor->data_scale(), src_tensor->data_zero_point());
+ }
+ else
+ {
+ elementwiseDequantize<InputT, OutputT>(src_tensor, dst_tensor);
+ }
+}
+
+template <typename SRC_T, typename DST_T,
+ std::enable_if_t<std::is_base_of<backend::ITensor, SRC_T>::value &&
+ std::is_base_of<backend::ITensor, DST_T>::value,
+ bool> = true>
+void typeAwareQuantize(const SRC_T *src_tensor, DST_T *dst_tensor)
+{
+ // TODO Support other types
+ if (src_tensor->data_type() == ir::DataType::FLOAT32)
+ {
+ switch (dst_tensor->data_type())
+ {
+ case ir::DataType::QUANT_UINT8_ASYMM:
+ {
+ quantize<float, uint8_t>(src_tensor, dst_tensor);
+ break;
+ }
+ case ir::DataType::QUANT_INT8_SYMM:
+ {
+ quantize<float, int8_t>(src_tensor, dst_tensor);
+ break;
+ }
+ case ir::DataType::QUANT_INT16_SYMM:
+ {
+ quantize<float, int16_t>(src_tensor, dst_tensor);
+ break;
+ }
+ default:
+ {
+ throw std::runtime_error("IPermuteFunction: Unsupported quantization type");
+ break;
+ }
+ }
+ }
+ else if (dst_tensor->data_type() == ir::DataType::FLOAT32)
+ {
+ switch (src_tensor->data_type())
+ {
+ case ir::DataType::QUANT_UINT8_ASYMM:
+ {
+ dequantize<uint8_t, float>(src_tensor, dst_tensor);
+ break;
+ }
+ case ir::DataType::QUANT_INT8_SYMM:
+ {
+ dequantize<int8_t, float>(src_tensor, dst_tensor);
+ break;
+ }
+ case ir::DataType::QUANT_INT16_SYMM:
+ {
+ dequantize<int16_t, float>(src_tensor, dst_tensor);
+ break;
+ }
+ default:
+ {
+ throw std::runtime_error("IPermuteFunction: Unsupported dequantization type");
+ break;
+ }
+ }
+ }
+ else
+ {
+ throw std::runtime_error("IPermuteFunction: Unsupported type for type-aware quantization yet");
+ }
+}
+
+} // namespace
+
+namespace onert
+{
+namespace exec
+{
+
+void IPermuteFunction::IPermuteFunction::run()
+{
+ // TODO Optimization : Make control does not reach here? when (_src_tensors.size() == 0)
+ assert(_src_tensors.size() == _dst_tensors.size());
+ if (_src_tensors_offsets.size() == 0)
+ {
+ _src_tensors_offsets.resize(_src_tensors.size());
+ _dst_tensors_offsets.resize(_dst_tensors.size());
+ }
+ assert(_src_tensors.size() == _src_tensors_offsets.size());
+ assert(_src_tensors_offsets.size() == _dst_tensors_offsets.size());
+
+ for (size_t i = 0; i < _src_tensors.size(); ++i)
+ {
+ auto src_tensor = _src_tensors.at(i);
+ auto dst_tensor = _dst_tensors.at(i);
+ auto &src_offsets = _src_tensors_offsets.at(i);
+ auto &dst_offsets = _dst_tensors_offsets.at(i);
+ if (src_tensor != dst_tensor)
+ {
+ const auto rank = src_tensor->getShape().rank();
+ permute(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+ }
+ }
+}
+
+void IPermuteFunction::permute(backend::ITensor *src_tensor, backend::ITensor *dst_tensor,
+ size_t rank, std::vector<size_t> &src_offsets,
+ std::vector<size_t> &dst_offsets)
+{
+ if (src_tensor->total_size() == 0)
+ {
+ assert(dst_tensor->total_size() == 0);
+ return;
+ }
+
+ assert(src_tensor != dst_tensor);
+ if (underlying_type(src_tensor->data_type()) != underlying_type(dst_tensor->data_type()))
+ {
+ typeAwareQuantize(src_tensor, dst_tensor);
+ return;
+ }
+
+ switch (src_tensor->data_type())
+ {
+ case ir::DataType::FLOAT32:
+ permute<float>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+ break;
+ case ir::DataType::INT32:
+ permute<int32_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+ break;
+ case ir::DataType::UINT32:
+ permute<uint32_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+ break;
+ case ir::DataType::BOOL8:
+ case ir::DataType::QUANT_UINT8_ASYMM:
+ case ir::DataType::UINT8:
+ permute<uint8_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+ break;
+ case ir::DataType::QUANT_INT8_ASYMM:
+ case ir::DataType::QUANT_INT8_SYMM:
+ permute<int8_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+ break;
+ case ir::DataType::INT64:
+ permute<int64_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+ break;
+ case ir::DataType::QUANT_INT16_SYMM:
+ permute<int16_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+ break;
+ default:
+ throw std::runtime_error("IPermuteFunction: Not supported data type");
+ break;
+ }
+}
+
+const std::type_info &IPermuteFunction::underlying_type(ir::DataType type) const
+{
+ switch (type)
+ {
+ case ir::DataType::FLOAT32:
+ return typeid(float);
+ case ir::DataType::INT32:
+ return typeid(int32_t);
+ case ir::DataType::UINT32:
+ return typeid(uint32_t);
+ case ir::DataType::INT64:
+ return typeid(int64_t);
+ case ir::DataType::BOOL8:
+ case ir::DataType::QUANT_UINT8_ASYMM:
+ case ir::DataType::UINT8:
+ return typeid(uint8_t);
+ case ir::DataType::QUANT_INT8_ASYMM:
+ case ir::DataType::QUANT_INT8_SYMM:
+ return typeid(int8_t);
+ case ir::DataType::QUANT_INT16_SYMM:
+ return typeid(int16_t);
+ default:
+ throw std::runtime_error("IPermuteFunction: Not supported data type");
+ }
+}
+
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/IPermuteFunction.h b/runtime/onert/core/src/exec/IPermuteFunction.h
index 6b4d15380..e790f3290 100644
--- a/runtime/onert/core/src/exec/IPermuteFunction.h
+++ b/runtime/onert/core/src/exec/IPermuteFunction.h
@@ -25,21 +25,48 @@
#include "backend/ITensor.h"
#include "exec/IFunction.h"
-#include "ir/Index.h"
-#include "ir/Shape.h"
#include <memory>
-#include <typeinfo>
-#include "util/Utils.h"
#include <vector>
+#include <unordered_map>
namespace onert
{
namespace exec
{
+inline void UpdateOffsets(::onert::backend::ITensor *src, ::onert::backend::ITensor *dst,
+ const ::onert::ir::Shape &loop_shape, std::vector<size_t> &src_offsets,
+ std::vector<size_t> &dst_offsets)
+{
+ ShapeLoop(loop_shape, [&](const onert::ir::Coordinates &coords) {
+ src_offsets.emplace_back(src->calcOffset(coords));
+ dst_offsets.emplace_back(dst->calcOffset(coords));
+ });
+}
+
+inline void CopyStatic(const uint8_t *src_buffer, uint8_t *dst_buffer,
+ const std::vector<size_t> &src_offsets,
+ const std::vector<size_t> &dst_offsets, size_t copy_len)
+{
+ assert(src_offsets.size() == dst_offsets.size());
+ for (size_t i = 0; i < src_offsets.size(); ++i)
+ {
+ memcpy(dst_buffer + dst_offsets.at(i), src_buffer + src_offsets.at(i), copy_len);
+ }
+}
+
+inline void CopyDynamic(const ::onert::backend::ITensor *src, const ::onert::backend::ITensor *dst,
+ uint8_t *dst_buffer, const ::onert::ir::Shape &loop_shape, size_t copy_len)
+{
+ ShapeLoop(loop_shape, [&](const onert::ir::Coordinates &coords) {
+ // Copy src tensor's data to dst_buffer with calculated offset of dst tensor
+ memcpy(dst_buffer + dst->calcOffset(coords), src->buffer() + src->calcOffset(coords), copy_len);
+ });
+}
+
class IPermuteFunction : public IFunction
{
-private:
+protected:
enum class PermuteType
{
NHWC_TO_NCHW,
@@ -48,63 +75,69 @@ private:
};
public:
- virtual void run() override
+ virtual void run() override;
+
+ virtual void prepare() override { optimize(); }
+
+ virtual void optimize() = 0;
+
+protected:
+ void permute(backend::ITensor *src_tensor, backend::ITensor *dst_tensor, size_t rank,
+ std::vector<size_t> &src_offsets, std::vector<size_t> &dst_offsets);
+
+private:
+ // TODO make src const by proving const access()
+ template <class T>
+ void permute(backend::ITensor *src, backend::ITensor *dst, size_t rank,
+ std::vector<size_t> &src_offsets, std::vector<size_t> &dst_offsets)
{
- assert(_src_tensors.size() > 0);
- assert(_src_tensors.size() == _dst_tensors.size());
- auto src_it = _src_tensors.begin();
- auto dst_it = _dst_tensors.begin();
- while (src_it != _src_tensors.end())
+ assert(src->total_size() != 0 && dst->total_size() != 0);
+ // If dst is subtensor, we have to use clEnqueueMapBuffer instead of clEnqueueWirteBuffer
+ if (dst->needMemoryMap() && !dst->is_subtensor())
{
- const auto src_tensor = *src_it;
- auto dst_tensor = *dst_it;
- if (src_tensor != dst_tensor)
+ // A assertion to check mapping without calling map()
+ // Now there is no case where both src and dst have cl buffer.
+ assert(!src->needMemoryMap());
+
+ if (!src->has_padding() && !dst->has_padding() && src->layout() == dst->layout())
{
- // TODO Change to permute in parallel
- assert(underlying_type(src_tensor->data_type()) ==
- underlying_type(dst_tensor->data_type()));
- const auto rank = src_tensor->num_dimensions();
- switch (src_tensor->data_type())
- {
- case ir::DataType::FLOAT32:
- permute<float>(src_tensor, dst_tensor, rank);
- break;
- case ir::DataType::INT32:
- permute<int32_t>(src_tensor, dst_tensor, rank);
- break;
- case ir::DataType::UINT32:
- permute<uint32_t>(src_tensor, dst_tensor, rank);
- break;
- case ir::DataType::BOOL8:
- case ir::DataType::QUANT_UINT8_ASYMM:
- case ir::DataType::UINT8:
- permute<uint8_t>(src_tensor, dst_tensor, rank);
- break;
- case ir::DataType::QUANT_INT8_SYMM:
- permute<int8_t>(src_tensor, dst_tensor, rank);
- break;
- case ir::DataType::INT64:
- permute<int64_t>(src_tensor, dst_tensor, rank);
- break;
- default:
- throw std::runtime_error("IPermuteFunction: Not supported data type");
- break;
- }
+ src->access([&](backend::ITensor &) { dst->enqueueWriteBuffer(src->buffer(), false); });
}
- src_it++;
- dst_it++;
+ else
+ {
+ // TODO Optimize this block in case of that padding size of dst is big.
+ _buffers_map[dst].reserve(dst->total_size());
+ auto dst_buffer = _buffers_map[dst].data();
+ src->access([&](backend::ITensor &) {
+ permute<T>(src, dst, rank, dst_buffer, dst->total_size(), src_offsets, dst_offsets);
+ });
+ dst->enqueueWriteBuffer(dst_buffer, false);
+ }
+ }
+ else if (src->needMemoryMap() && !src->is_subtensor() && !src->has_padding() &&
+ !dst->has_padding() && src->layout() == dst->layout())
+ {
+ assert(!dst->needMemoryMap());
+ dst->access([&](backend::ITensor &) { src->enqueueReadBuffer(dst->buffer(), true); });
+ }
+ else
+ {
+ auto fn = [&](backend::ITensor &) {
+ dst->access([&](backend::ITensor &) {
+ permute<T>(src, dst, rank, dst->buffer(), dst->total_size(), src_offsets, dst_offsets);
+ });
+ };
+ src->access(fn);
}
}
- virtual void prepare() override { optimize(); }
-
- virtual void optimize() = 0;
-
-private:
template <class T>
- void permute(const std::shared_ptr<backend::ITensor> &src, std::shared_ptr<backend::ITensor> &dst,
- size_t rank)
+ void permute(backend::ITensor *src, backend::ITensor *dst, size_t rank, uint8_t *dst_buffer,
+ size_t dst_size, std::vector<size_t> &src_offsets, std::vector<size_t> &dst_offsets)
{
+ assert(dst_buffer != nullptr);
+ assert(dst_size == dst->total_size());
+
const auto permute_type = [&]() -> PermuteType {
if (src->layout() == ir::Layout::NHWC && dst->layout() == ir::Layout::NCHW)
{
@@ -119,166 +152,115 @@ private:
return PermuteType::COPY;
}
}();
- auto fn = [&](backend::ITensor &src_tensor) {
- dst->access([&](backend::ITensor &dst_tensor) {
- auto src_buffer = src_tensor.buffer();
- auto src_size = src_tensor.total_size();
- auto dst_buffer = dst_tensor.buffer();
- if (permute_type == PermuteType::COPY)
+ if (rank == 4 && permute_type != PermuteType::COPY)
+ {
+ switch (permute_type)
+ {
+ case PermuteType::NHWC_TO_NCHW:
{
- assert(src_tensor.layout() == dst_tensor.layout());
- if (!src_tensor.has_padding() && !dst_tensor.has_padding())
- {
- assert(src_size <= dst_tensor.total_size());
- memcpy(dst_buffer, src_buffer, src_size);
- return;
- }
+ ir::FeatureShape shape;
+ auto dst_shape = dst->getShape();
+ shape.N = dst_shape.dim(0);
+ shape.C = dst_shape.dim(1);
+ shape.H = dst_shape.dim(2);
+ shape.W = dst_shape.dim(3);
+
+ typename feature::nchw::View<T>::Strides strides;
+ const auto start_offset = dst->calcOffset({0, 0, 0, 0});
+ strides.W = dst_shape.dim(3) == 1 ? 0 : dst->calcOffset({0, 0, 0, 1}) - start_offset;
+ strides.H = dst_shape.dim(2) == 1 ? 0 : dst->calcOffset({0, 0, 1, 0}) - start_offset;
+ strides.C = dst_shape.dim(1) == 1 ? 0 : dst->calcOffset({0, 1, 0, 0}) - start_offset;
+ strides.N = dst_shape.dim(0) == 1 ? 0 : dst->calcOffset({1, 0, 0, 0}) - start_offset;
+
+ const feature::nhwc::Reader<T> from(src);
+ feature::nchw::View<T> into(shape, strides,
+ reinterpret_cast<T *>(dst_buffer + start_offset), dst_size);
+ feature::iterate(shape) << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+ const auto value = from.at(batch, row, col, ch);
+ into.at(batch, ch, row, col) = value;
+ };
+ break;
}
- switch (rank)
+ case PermuteType::NCHW_TO_NHWC:
{
- case 0:
- case 1:
- {
- const int32_t copy_len = dst_tensor.dimension(0);
+ ir::FeatureShape shape;
+ auto dst_shape = dst->getShape();
+ shape.N = dst_shape.dim(0);
+ shape.H = dst_shape.dim(1);
+ shape.W = dst_shape.dim(2);
+ shape.C = dst_shape.dim(3);
- memcpy(dst_buffer, src_buffer, copy_len * sizeof(T));
- break;
- }
- case 2:
- {
- const int32_t dim_0 = dst_tensor.dimension(0);
- const int32_t copy_len = dst_tensor.dimension(1);
+ typename feature::nhwc::View<T>::Strides strides;
+ const auto start_offset = dst->calcOffset({0, 0, 0, 0});
+ strides.C = dst_shape.dim(3) == 1 ? 0 : dst->calcOffset({0, 0, 0, 1}) - start_offset;
+ strides.W = dst_shape.dim(2) == 1 ? 0 : dst->calcOffset({0, 0, 1, 0}) - start_offset;
+ strides.H = dst_shape.dim(1) == 1 ? 0 : dst->calcOffset({0, 1, 0, 0}) - start_offset;
+ strides.N = dst_shape.dim(0) == 1 ? 0 : dst->calcOffset({1, 0, 0, 0}) - start_offset;
- for (int32_t i = 0; i < dim_0; ++i)
- {
- ir::Coordinates coords{i, 0};
- memcpy(dst_buffer + dst_tensor.calcOffset(coords),
- src_buffer + src_tensor.calcOffset(coords), copy_len * sizeof(T));
- }
- break;
- }
- case 3:
- {
- const int32_t dim_0 = dst_tensor.dimension(0);
- const int32_t dim_1 = dst_tensor.dimension(1);
- const int32_t copy_len = dst_tensor.dimension(2);
+ const feature::nchw::Reader<T> from(src);
+ feature::nhwc::View<T> into(shape, strides,
+ reinterpret_cast<T *>(dst_buffer + start_offset), dst_size);
+ feature::iterate(shape) << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+ const auto value = from.at(batch, ch, row, col);
+ into.at(batch, row, col, ch) = value;
+ };
+ break;
+ }
+ default:
+ {
+ throw std::runtime_error("Unsupported Permutation");
+ break;
+ }
+ }
+ }
+ else if (!src->has_padding() && !dst->has_padding())
+ {
+ auto src_size = src->total_size();
+ assert(src_size <= dst->total_size());
+ memcpy(dst_buffer, src->buffer(), src_size);
+ }
+ else
+ {
+ auto loop_shape = src->getShape();
+ const auto copy_axis = loop_shape.rank() - 1;
+ const auto copy_len = loop_shape.dim(copy_axis) * sizeof(T);
+ loop_shape.dim(copy_axis) = 1;
- for (auto i = 0; i < dim_0; ++i)
- {
- for (auto j = 0; j < dim_1; ++j)
- {
- ir::Coordinates coords{i, j, 0};
- memcpy(dst_buffer + dst_tensor.calcOffset(coords),
- src_buffer + src_tensor.calcOffset(coords), copy_len * sizeof(T));
- }
- }
- break;
- }
- case 4:
- {
- switch (permute_type)
- {
- case PermuteType::NHWC_TO_NCHW:
- {
- ir::FeatureShape shape;
- shape.N = dst_tensor.dimension(0);
- shape.C = dst_tensor.dimension(1);
- shape.H = dst_tensor.dimension(2);
- shape.W = dst_tensor.dimension(3);
- const feature::nhwc::Reader<T> from(&src_tensor);
- feature::nchw::View<T> into(&dst_tensor);
- feature::iterate(shape)
- << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
- const auto value = from.at(batch, row, col, ch);
- into.at(batch, ch, row, col) = value;
- };
- break;
- }
- case PermuteType::NCHW_TO_NHWC:
- {
- ir::FeatureShape shape;
- shape.N = src_tensor.dimension(0);
- shape.C = src_tensor.dimension(1);
- shape.H = src_tensor.dimension(2);
- shape.W = src_tensor.dimension(3);
- const feature::nchw::Reader<T> from(&src_tensor);
- feature::nhwc::View<T> into(&dst_tensor);
- feature::iterate(shape)
- << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
- const auto value = from.at(batch, ch, row, col);
- into.at(batch, row, col, ch) = value;
- };
- break;
- }
- case PermuteType::COPY:
- {
- const int32_t dim_0 = dst_tensor.dimension(0);
- const int32_t dim_1 = dst_tensor.dimension(1);
- const int32_t dim_2 = dst_tensor.dimension(2);
- const int32_t copy_len = dst_tensor.dimension(3);
+ if (src->is_dynamic())
+ {
+ assert(dst->is_dynamic());
+ CopyDynamic(src, dst, dst_buffer, loop_shape, copy_len);
+ }
+ else
+ {
+ // TODO Uncomment the assertion below
+ // assert(!dst->is_dynamic() || dst is output of graph);
+ if (src_offsets.size() == 0)
+ {
+ assert(dst_offsets.size() == 0);
- for (auto i = 0; i < dim_0; ++i)
- {
- for (auto j = 0; j < dim_1; ++j)
- {
- for (auto k = 0; k < dim_2; ++k)
- {
- ir::Coordinates coords{i, j, k, 0};
- memcpy(dst_buffer + dst_tensor.calcOffset(coords),
- src_buffer + src_tensor.calcOffset(coords), copy_len * sizeof(T));
- }
- }
- }
- break;
- }
- default:
- {
- throw std::runtime_error("Unsupported Permutation");
- break;
- }
- }
- break;
- }
- default:
- throw std::runtime_error("Unsupported rank in permutation");
- break;
+ auto loop_shape = src->getShape();
+ const auto copy_axis = loop_shape.rank() - 1;
+ loop_shape.dim(copy_axis) = 1;
+ UpdateOffsets(src, dst, loop_shape, src_offsets, dst_offsets);
}
- });
- };
- src->access(fn);
+ CopyStatic(src->buffer(), dst_buffer, src_offsets, dst_offsets, copy_len);
+ }
+ }
}
+protected:
// NOTE The typeid expression is lvalue expression which refers to an object with static storage
// duration, of the polymorphic type const std::type_info or of some type derived from it.
// So std::type_info is non-copyable
- const std::type_info &underlying_type(ir::DataType type) const
- {
- switch (type)
- {
- case ir::DataType::FLOAT32:
- return typeid(float);
- case ir::DataType::INT32:
- return typeid(int32_t);
- case ir::DataType::UINT32:
- return typeid(uint32_t);
- case ir::DataType::INT64:
- return typeid(int64_t);
- case ir::DataType::BOOL8:
- case ir::DataType::QUANT_UINT8_ASYMM:
- case ir::DataType::UINT8:
- return typeid(uint8_t);
- case ir::DataType::QUANT_INT8_SYMM:
- return typeid(int8_t);
- default:
- throw std::runtime_error("IPermuteFunction: Not supported data type");
- }
- }
+ const std::type_info &underlying_type(ir::DataType type) const;
protected:
- std::vector<std::shared_ptr<backend::ITensor>> _src_tensors;
- std::vector<std::shared_ptr<backend::ITensor>> _dst_tensors;
- // TODO Remove this member if it is possible
- std::vector<size_t> _ranks;
+ std::vector<backend::ITensor *> _src_tensors;
+ std::vector<backend::ITensor *> _dst_tensors;
+ std::vector<std::vector<size_t>> _src_tensors_offsets;
+ std::vector<std::vector<size_t>> _dst_tensors_offsets;
+ std::unordered_map<const backend::ITensor *, std::vector<uint8_t>> _buffers_map;
};
} // namespace exec
diff --git a/runtime/onert/core/src/exec/IPermuteFunction.test.cc b/runtime/onert/core/src/exec/IPermuteFunction.test.cc
new file mode 100644
index 000000000..1009f194d
--- /dev/null
+++ b/runtime/onert/core/src/exec/IPermuteFunction.test.cc
@@ -0,0 +1,902 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IPermuteFunction.h"
+
+#include <ir/Layout.h>
+#include <ir/Shape.h>
+#include <ir/TypeInfo.h>
+
+#include <cmath>
+#include <gtest/gtest.h>
+
+namespace
+{
+using namespace onert;
+using namespace ir;
+using namespace backend;
+using namespace exec;
+
+class MockUpTensor : public ITensor
+{
+public:
+ MockUpTensor(const Shape &shape, const TypeInfo &type_info, Layout layout, size_t pad)
+ : _shape(shape), _type_info(type_info), _data(nullptr), _layout(layout)
+ {
+ _strides.resize(shape.rank());
+
+ std::vector<size_t> pads(shape.rank(), 0);
+ pads[shape.rank() - 1] = pad;
+ size_t stride = 1;
+ for (int32_t i = _shape.rank() - 1; i >= 0; --i)
+ {
+ _strides.at(i) = stride;
+ stride = stride * (_shape.dim(i) + pads.at(i));
+ }
+ }
+ virtual ~MockUpTensor() {}
+
+ void setBuffer(uint8_t *data) { _data = data; }
+
+ size_t total_size() const override
+ {
+ size_t total_size = _strides[0] * _shape.dim(0);
+ total_size *= sizeOfDataType(data_type());
+ return total_size;
+ }
+
+ size_t calcOffset(const ir::Coordinates &coords) const override
+ {
+ size_t offset = 0;
+ for (size_t i = 0; i < _shape.rank(); ++i)
+ {
+ offset += (_strides[i] * coords[i]);
+ }
+ offset *= sizeOfDataType(data_type());
+ return offset;
+ }
+
+ uint8_t *buffer() const override { return _data; }
+
+ ir::Layout layout() const override { return _layout; }
+ ir::DataType data_type() const override { return _type_info.type(); }
+ float data_scale() const override { return _type_info.scale(); }
+ int32_t data_zero_point() const override { return _type_info.zero_point(); }
+ const std::vector<float> &data_scales() const override { return _type_info.scales(); }
+ const std::vector<int32_t> &data_zero_points() const override { return _type_info.zero_points(); }
+ bool has_padding() const override
+ {
+ return total_size() / sizeOfDataType(data_type()) != _shape.num_elements();
+ }
+ void access(const std::function<void(ITensor &tensor)> &fn) final { fn(*this); }
+
+ bool is_dynamic() const override { return false; }
+ Shape getShape() const override { return _shape; }
+
+private:
+ Shape _shape;
+ TypeInfo _type_info;
+ Layout _layout;
+ uint8_t *_data;
+ std::vector<size_t> _strides;
+};
+
+class MockUpLayer : public IPermuteFunction
+{
+public:
+ MockUpLayer(const std::vector<ITensor *> &inputs, const std::vector<ITensor *> &outputs)
+ {
+ assert(inputs.size() == outputs.size());
+ _src_tensors = inputs;
+ _dst_tensors = outputs;
+ }
+ virtual ~MockUpLayer() {}
+ void optimize() override {}
+};
+
+TEST(IPermuteFunction, float_rank1)
+{
+ const size_t input_pads[4] = {0, 1, 0, 2};
+ const size_t output_pads[4] = {0, 0, 2, 1};
+ const std::vector<Shape> shapes{{1}, {4}, {5}, {2}};
+ float expected_buffer[] = {1, 0, -1, -2, 3};
+ const auto type_info = TypeInfo(DataType::FLOAT32);
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+ outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ Coordinates coords{j};
+ float result =
+ *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+ float expected =
+ *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+ EXPECT_EQ(result, expected);
+ }
+ }
+}
+
+TEST(IPermuteFunction, float_rank2)
+{
+ const size_t input_pads[4] = {0, 1, 0, 2};
+ const size_t output_pads[4] = {0, 0, 2, 1};
+ const std::vector<Shape> shapes{{1, 4}, {2, 2}, {1, 5}, {2, 3}};
+ float expected_buffer[] = {1, 0, -1, -2, 3, -4, 5, -6, 7, -8};
+ const auto type_info = TypeInfo(DataType::FLOAT32);
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+ outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ Coordinates coords{j, k};
+ float result =
+ *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+ float expected =
+ *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+}
+
+TEST(IPermuteFunction, float_rank3)
+{
+ const size_t input_pads[4] = {0, 5, 0, 2};
+ const size_t output_pads[4] = {0, 3, 2, 1};
+ const std::vector<Shape> shapes{{1, 4, 1}, {1, 2, 1}, {2, 1, 5}, {1, 2, 3}};
+ float expected_buffer[] = {1, 0, -1, -2, 3, -4, 5, -6, 7, -8, 9, -10};
+ const auto type_info = TypeInfo(DataType::FLOAT32);
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+ outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+ {
+ Coordinates coords{j, k, l};
+ float result =
+ *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+ float expected =
+ *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+ }
+}
+
+TEST(IPermuteFunction, float_rank4)
+{
+ const size_t input_pads[4] = {0, 0, 1, 2};
+ const size_t output_pads[4] = {0, 3, 2, 1};
+ const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+ float expected_buffer[] = {1, 0, -1, -2, 3, -4, 5, -6, 7, -8, 9, -10};
+ const auto type_info = TypeInfo(DataType::FLOAT32);
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+ outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+ {
+ for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+ {
+ Coordinates coords{j, k, l, m};
+ float result =
+ *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+ float expected =
+ *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(IPermuteFunction, float_rank4_layout)
+{
+ const size_t input_pads[4] = {0, 0, 1, 2};
+ const size_t output_pads[4] = {0, 3, 2, 1};
+ const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+ float expected_buffer[] = {1, 0, -1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16};
+ const auto type_info = TypeInfo(DataType::FLOAT32);
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ Layout layout = Layout::NHWC;
+ Shape shape = shapes[i];
+ if (i % 2 == 1)
+ {
+ layout = Layout::NCHW;
+ shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)};
+ }
+ inputs[i] = std::make_unique<MockUpTensor>(shape, type_info, layout, input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+ if (layout == Layout::NHWC)
+ {
+ layout = Layout::NCHW;
+ shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)};
+ }
+ else
+ {
+ layout = Layout::NHWC;
+ shape = shapes[i];
+ }
+ outputs[i] = std::make_unique<MockUpTensor>(shape, type_info, layout, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+ {
+ for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+ {
+ Coordinates input_coords;
+ Coordinates output_coords;
+ if (inputs[i]->layout() == Layout::NHWC)
+ {
+ input_coords = Coordinates{j, k, l, m};
+ }
+ else
+ {
+ input_coords = Coordinates{j, m, k, l};
+ }
+ if (outputs[i]->layout() == Layout::NHWC)
+ {
+ output_coords = Coordinates{j, k, l, m};
+ }
+ else
+ {
+ output_coords = Coordinates{j, m, k, l};
+ }
+ float result = *reinterpret_cast<float *>(outputs[i]->buffer() +
+ outputs[i]->calcOffset(output_coords));
+ float expected =
+ *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(input_coords));
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(IPermuteFunction, float_to_qasymm8)
+{
+ const size_t input_pads[4] = {0, 0, 1, 2};
+ const size_t output_pads[4] = {0, 3, 2, 1};
+ const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+ float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100};
+ float scale = 10;
+ int32_t zero_point = 128;
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ inputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32), Layout::NHWC,
+ input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+ TypeInfo type_info{DataType::QUANT_UINT8_ASYMM, scale, zero_point};
+ outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+ {
+ for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+ {
+ Coordinates coords{j, k, l, m};
+ uint8_t qasymm8 =
+ *reinterpret_cast<uint8_t *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+ float result = (qasymm8 - zero_point) * scale;
+ float expected =
+ *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(IPermuteFunction, float_to_qsymm8)
+{
+ const size_t input_pads[4] = {0, 0, 1, 2};
+ const size_t output_pads[4] = {0, 3, 2, 1};
+ const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+ float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100};
+ float scale = 10;
+ int32_t zero_point = 0;
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ inputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32), Layout::NHWC,
+ input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+ TypeInfo type_info{DataType::QUANT_INT8_SYMM, scale, zero_point};
+ outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+ {
+ for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+ {
+ Coordinates coords{j, k, l, m};
+ int8_t qsymm8 =
+ *reinterpret_cast<int8_t *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+ float result = (qsymm8 - zero_point) * scale;
+ float expected =
+ *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(IPermuteFunction, float_to_qsymm16)
+{
+ const size_t input_pads[4] = {0, 0, 1, 2};
+ const size_t output_pads[4] = {0, 3, 2, 1};
+ const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+ float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100};
+ float scale = 10;
+ int32_t zero_point = 0;
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ inputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32), Layout::NHWC,
+ input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+ TypeInfo type_info{DataType::QUANT_INT16_SYMM, scale, zero_point};
+ outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+ {
+ for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+ {
+ Coordinates coords{j, k, l, m};
+ int16_t qsymm16 =
+ *reinterpret_cast<int16_t *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+ float result = (qsymm16 - zero_point) * scale;
+ float expected =
+ *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(IPermuteFunction, qasymm8_to_float)
+{
+ const size_t input_pads[4] = {0, 0, 1, 2};
+ const size_t output_pads[4] = {0, 3, 2, 1};
+ const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+ float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100};
+ float scale = 10;
+ int32_t zero_point = 128;
+ uint8_t input_buffer[12];
+
+ int32_t min_val = std::numeric_limits<uint8_t>::min();
+ int32_t max_val = std::numeric_limits<uint8_t>::max();
+ for (int32_t i = 0; i < sizeof(expected_buffer) / sizeof(float); ++i)
+ {
+ int32_t unclamped = static_cast<int32_t>(std::round(expected_buffer[i] / scale)) + zero_point;
+ input_buffer[i] = std::min(std::max(unclamped, min_val), max_val);
+ }
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ TypeInfo type_info{DataType::QUANT_UINT8_ASYMM, scale, zero_point};
+ inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(input_buffer));
+
+ outputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32),
+ Layout::NHWC, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+ {
+ for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+ {
+ Coordinates coords{j, k, l, m};
+ float result =
+ *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+ uint8_t qasymm8 =
+ *reinterpret_cast<uint8_t *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+ float expected = (qasymm8 - zero_point) * scale;
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(IPermuteFunction, qsymm8_to_float)
+{
+ const size_t input_pads[4] = {0, 0, 1, 2};
+ const size_t output_pads[4] = {0, 3, 2, 1};
+ const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+ float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100};
+ float scale = 10;
+ int32_t zero_point = 0;
+ uint8_t input_buffer[12];
+
+ int32_t min_val = std::numeric_limits<int8_t>::min();
+ int32_t max_val = std::numeric_limits<int8_t>::max();
+ for (int32_t i = 0; i < sizeof(expected_buffer) / sizeof(float); ++i)
+ {
+ int32_t unclamped = static_cast<int32_t>(std::round(expected_buffer[i] / scale)) + zero_point;
+ input_buffer[i] = std::min(std::max(unclamped, min_val), max_val);
+ }
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ TypeInfo type_info{DataType::QUANT_INT8_SYMM, scale, zero_point};
+ inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(input_buffer));
+
+ outputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32),
+ Layout::NHWC, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+ {
+ for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+ {
+ Coordinates coords{j, k, l, m};
+ float result =
+ *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+ int8_t qasymm8 =
+ *reinterpret_cast<int8_t *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+ float expected = (qasymm8 - zero_point) * scale;
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(IPermuteFunction, qsymm16_to_float)
+{
+ const size_t input_pads[4] = {0, 0, 1, 2};
+ const size_t output_pads[4] = {0, 3, 2, 1};
+ const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+ float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100};
+ float scale = 10;
+ int32_t zero_point = 0;
+ uint8_t input_buffer[12];
+
+ int32_t min_val = std::numeric_limits<int16_t>::min();
+ int32_t max_val = std::numeric_limits<int16_t>::max();
+ for (int32_t i = 0; i < sizeof(expected_buffer) / sizeof(float); ++i)
+ {
+ int32_t unclamped = static_cast<int32_t>(std::round(expected_buffer[i] / scale)) + zero_point;
+ input_buffer[i] = std::min(std::max(unclamped, min_val), max_val);
+ }
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ TypeInfo type_info{DataType::QUANT_INT16_SYMM, scale, zero_point};
+ inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(input_buffer));
+
+ outputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32),
+ Layout::NHWC, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+ {
+ for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+ {
+ Coordinates coords{j, k, l, m};
+ float result =
+ *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+ int16_t qasymm8 =
+ *reinterpret_cast<int16_t *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+ float expected = (qasymm8 - zero_point) * scale;
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(IPermuteFunction, float_to_qasymm8_layout)
+{
+ const size_t input_pads[4] = {0, 0, 1, 2};
+ const size_t output_pads[4] = {0, 3, 2, 1};
+ const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+ float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70,
+ -80, 90, -100, 110, -120, 130, -140, 150, -160};
+ float scale = 10;
+ int32_t zero_point = 128;
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ Layout layout = Layout::NHWC;
+ Shape shape = shapes[i];
+ if (i % 2 == 1)
+ {
+ layout = Layout::NCHW;
+ shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)};
+ }
+ inputs[i] =
+ std::make_unique<MockUpTensor>(shape, TypeInfo(DataType::FLOAT32), layout, input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+ if (layout == Layout::NHWC)
+ {
+ layout = Layout::NCHW;
+ shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)};
+ }
+ else
+ {
+ layout = Layout::NHWC;
+ shape = shapes[i];
+ }
+ TypeInfo type_info{DataType::QUANT_UINT8_ASYMM, scale, zero_point};
+ outputs[i] = std::make_unique<MockUpTensor>(shape, type_info, layout, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+ {
+ for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+ {
+ Coordinates input_coords;
+ Coordinates output_coords;
+ if (inputs[i]->layout() == Layout::NHWC)
+ {
+ input_coords = Coordinates{j, k, l, m};
+ }
+ else
+ {
+ input_coords = Coordinates{j, m, k, l};
+ }
+ if (outputs[i]->layout() == Layout::NHWC)
+ {
+ output_coords = Coordinates{j, k, l, m};
+ }
+ else
+ {
+ output_coords = Coordinates{j, m, k, l};
+ }
+ uint8_t qasymm8 = *reinterpret_cast<uint8_t *>(outputs[i]->buffer() +
+ outputs[i]->calcOffset(output_coords));
+ float result = (qasymm8 - zero_point) * scale;
+ float expected =
+ *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(input_coords));
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(IPermuteFunction, asymm8_to_float_layout)
+{
+ const size_t input_pads[4] = {0, 0, 1, 2};
+ const size_t output_pads[4] = {0, 3, 2, 1};
+ const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+ float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70,
+ -80, 90, -100, 110, -120, 130, -140, 150, -160};
+ float scale = 10;
+ int32_t zero_point = 128;
+ uint8_t input_buffer[18];
+
+ int32_t min_val = std::numeric_limits<int16_t>::min();
+ int32_t max_val = std::numeric_limits<int16_t>::max();
+ for (int32_t i = 0; i < sizeof(expected_buffer) / sizeof(float); ++i)
+ {
+ int32_t unclamped = static_cast<int32_t>(std::round(expected_buffer[i] / scale)) + zero_point;
+ input_buffer[i] = std::min(std::max(unclamped, min_val), max_val);
+ }
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ Layout layout = Layout::NHWC;
+ Shape shape = shapes[i];
+ if (i % 2 == 1)
+ {
+ layout = Layout::NCHW;
+ shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)};
+ }
+ TypeInfo type_info{DataType::QUANT_UINT8_ASYMM, scale, zero_point};
+ inputs[i] = std::make_unique<MockUpTensor>(shape, type_info, layout, input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+ if (layout == Layout::NHWC)
+ {
+ layout = Layout::NCHW;
+ shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)};
+ }
+ else
+ {
+ layout = Layout::NHWC;
+ shape = shapes[i];
+ }
+ outputs[i] =
+ std::make_unique<MockUpTensor>(shape, TypeInfo(DataType::FLOAT32), layout, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+ {
+ for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+ {
+ Coordinates input_coords;
+ Coordinates output_coords;
+ if (inputs[i]->layout() == Layout::NHWC)
+ {
+ input_coords = Coordinates{j, k, l, m};
+ }
+ else
+ {
+ input_coords = Coordinates{j, m, k, l};
+ }
+ if (outputs[i]->layout() == Layout::NHWC)
+ {
+ output_coords = Coordinates{j, k, l, m};
+ }
+ else
+ {
+ output_coords = Coordinates{j, m, k, l};
+ }
+ float result = *reinterpret_cast<float *>(outputs[i]->buffer() +
+ outputs[i]->calcOffset(output_coords));
+ uint8_t qasymm8 = *reinterpret_cast<uint8_t *>(inputs[i]->buffer() +
+ inputs[i]->calcOffset(input_coords));
+ float expected = (qasymm8 - zero_point) * scale;
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+ }
+ }
+}
+
+} // namespace
diff --git a/runtime/onert/core/src/exec/JSONExecTime.cc b/runtime/onert/core/src/exec/JSONExecTime.cc
index 72a18def1..d149345fd 100644
--- a/runtime/onert/core/src/exec/JSONExecTime.cc
+++ b/runtime/onert/core/src/exec/JSONExecTime.cc
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "exec/JSONExecTime.h"
-#include "backend/IConfig.h"
+#include "JSONExecTime.h"
+
#include <fstream>
namespace onert
@@ -135,7 +135,7 @@ void JSON::printOperation(const std::map<uint32_t, int64_t> &operation_info,
stream.seekp(-2, std::ofstream::end);
}
-void JSON::uploadOperationsExecTime() const
+void JSON::storeOperationsExecTime() const
{
std::ofstream stream(_measurement_file);
if (!stream.is_open())
diff --git a/runtime/onert/core/src/exec/JSONExecTime.h b/runtime/onert/core/src/exec/JSONExecTime.h
index a64cb3133..e01723611 100644
--- a/runtime/onert/core/src/exec/JSONExecTime.h
+++ b/runtime/onert/core/src/exec/JSONExecTime.h
@@ -37,15 +37,15 @@ namespace exec
* _measurements[Backend*]["string"][bool][uint32_t] = int64_t
*/
using MeasurementData = std::unordered_map<
- const backend::Backend *,
- std::unordered_map<std::string, std::unordered_map<bool, std::map<uint32_t, int64_t>>>>;
+ const backend::Backend *,
+ std::unordered_map<std::string, std::unordered_map<bool, std::map<uint32_t, int64_t>>>>;
class JSON
{
public:
explicit JSON(const std::vector<const backend::Backend *> &backends,
MeasurementData &measurements)
- : _measurement_file("exec_time.json"), _backends(), _measurements(measurements)
+ : _measurement_file("exec_time.json"), _backends(), _measurements(measurements)
{
for (const auto b : backends)
{
@@ -54,18 +54,16 @@ public:
loadOperationsExecTime();
};
/**
- * @brief Update _operations_exec_time_file with new data.
+ * @brief Update _measurement_file with new data.
*/
- void uploadOperationsExecTime() const;
+ void storeOperationsExecTime() const;
private:
///@brief file containing measurements
std::string _measurement_file;
std::unordered_map<std::string, const backend::Backend *> _backends;
- std::unordered_map<
- const backend::Backend *,
- std::unordered_map<std::string, std::unordered_map<bool, std::map<uint32_t, int64_t>>>>
- &_measurements;
+ MeasurementData &_measurements;
+
/**
* @brief Helper function for inserting data to OperationExecTimes
*
@@ -86,7 +84,7 @@ private:
void printOperation(const std::map<uint32_t, int64_t> &operation_info,
std::ofstream &stream) const;
/**
- * @brief Parse and load operations_exec_time from _operations_exec_time_file.
+ * @brief Parse and load _measurements from _measurement_file.
*/
void loadOperationsExecTime();
};
diff --git a/runtime/onert/core/src/exec/LinearExecutor.cc b/runtime/onert/core/src/exec/LinearExecutor.cc
index 69dfe9b9b..a64dadcb1 100644
--- a/runtime/onert/core/src/exec/LinearExecutor.cc
+++ b/runtime/onert/core/src/exec/LinearExecutor.cc
@@ -24,41 +24,54 @@ namespace onert
namespace exec
{
-#ifdef RUY_PROFILER
-namespace
-{
-char *seq_to_label(const onert::ir::OpSequence *op_seq, const onert::ir::Operations &operations)
+void LinearExecutor::executeImpl()
{
- auto node_name = operations.at(*op_seq->begin()).name();
- char *cstr = new char[node_name.length() + 1];
- std::strcpy(cstr, node_name.c_str());
- return cstr;
-}
-} // namespace
+ if (_tracing_ctx)
+ {
+ auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_graph);
+
+ _subject.notifySubgraphBegin(profiling_subg_index);
+ for (auto &&code : _code)
+ {
+ const auto backend = code.lower_info->backend();
+// TODO : Move ruy profiler into ExecutionObserver
+#ifdef RUY_PROFILER
+ ruy::profiler::ScopeLabel label(code.op->name());
#endif
+ _subject.notifyJobBegin(this, profiling_subg_index, code.op_ind, backend);
-void LinearExecutor::executeImpl()
-{
- _subject.notifyModelBegin(this);
- for (auto &&code : _code)
+ auto &fn_seq = code.fn_seq;
+
+ fn_seq->initRunning();
+
+ bool handle_dynamic_tensor =
+ _lowered_graph->getHasDynamicTensor(code.op_ind) || hasDynamicInput();
+ fn_seq->enableDynamicShapeInferer(handle_dynamic_tensor);
+ fn_seq->run();
+
+ _subject.notifyJobEnd(this, profiling_subg_index, code.op_ind, backend);
+ }
+ _subject.notifySubgraphEnd(profiling_subg_index);
+ }
+ else
{
- const auto op_seq = code.op_seq;
- const auto backend = code.lower_info->backend();
+ for (auto &&code : _code)
+ {
// TODO : Move ruy profiler into ExecutionObserver
#ifdef RUY_PROFILER
- ruy::profiler::ScopeLabel label(seq_to_label(op_seq, _graph.operations()));
+ ruy::profiler::ScopeLabel label(code.op->name());
#endif
- _subject.notifyJobBegin(this, op_seq, backend);
- auto &fn_seq = code.fn_seq;
- bool handle_dynamic_tensor = op_seq->has_dynamic_tensor() || hasDynamicInput();
+ auto &fn_seq = code.fn_seq;
- fn_seq->enableDynamicShapeInferer(handle_dynamic_tensor);
- fn_seq->run();
+ fn_seq->initRunning();
- _subject.notifyJobEnd(this, op_seq, backend);
+ bool handle_dynamic_tensor =
+ _lowered_graph->getHasDynamicTensor(code.op_ind) || hasDynamicInput();
+ fn_seq->enableDynamicShapeInferer(handle_dynamic_tensor);
+ fn_seq->run();
+ }
}
- _subject.notifyModelEnd(this);
}
} // namespace exec
diff --git a/runtime/onert/core/src/exec/LinearExecutor.h b/runtime/onert/core/src/exec/LinearExecutor.h
index c224d3f4f..cc073411a 100644
--- a/runtime/onert/core/src/exec/LinearExecutor.h
+++ b/runtime/onert/core/src/exec/LinearExecutor.h
@@ -22,11 +22,11 @@
#ifndef __ONERT_EXEC_EXECUTOR_H_
#define __ONERT_EXEC_EXECUTOR_H_
-#include "ir/Index.h"
#include "ExecutorBase.h"
-#include "compiler/Linear.h"
-#include "exec/FunctionSequence.h"
+
#include "compiler/CodeMap.h"
+#include "ir/Index.h"
+#include "util/TracingCtx.h"
namespace onert
{
@@ -44,18 +44,15 @@ public:
* @brief Construct a new LinearExecutor object
* @param lowered_graph LoweredGraph object
* @param tensor_builders Tensor builders that are currently used
- * @param code_map OpSequence and its code map
+ * @param code_map @c ir::Operation and its code map
*/
LinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
- const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
- const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorRegistries &tensor_regs,
- backend::TensorManagerSet &&tensor_mgrs, compiler::CodeMap &&code_map,
- const std::vector<ir::OpSequenceIndex> &order)
- : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
- std::move(tensor_mgrs)}
+ backend::BackendContexts &&backend_contexts,
+ const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map,
+ const std::vector<ir::OperationIndex> &order, const util::TracingCtx *tracing_ctx)
+ : ExecutorBase{std::move(lowered_graph), std::move(backend_contexts), tensor_regs, tracing_ctx}
{
- for (auto index : order)
+ for (auto &&index : order)
{
_code.emplace_back(std::move(code_map.at(index)));
}
diff --git a/runtime/onert/core/src/exec/MinMaxRecorder.cc b/runtime/onert/core/src/exec/MinMaxRecorder.cc
new file mode 100644
index 000000000..88fc104d1
--- /dev/null
+++ b/runtime/onert/core/src/exec/MinMaxRecorder.cc
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MinMaxRecorder.h"
+
+#include "backend/ITensor.h"
+
+#include <cassert>
+#include <cmath>
+
+namespace onert
+{
+namespace exec
+{
+
+MinMaxRecorder::MinMaxRecorder(const std::string &minmax_filepath, const ir::Graph &graph,
+ const backend::BackendContexts &backend_contexts)
+ : _graph{graph}, _backend_contexts{backend_contexts}, _h5dumper(minmax_filepath)
+{
+}
+
+void MinMaxRecorder::handleJobEnd(IExecutor *, ir::SubgraphIndex subg_idx,
+ ir::OperationIndex op_idx, const backend::Backend *backend)
+{
+ const auto &tensor_reg = _backend_contexts.at(backend)->tensor_registry;
+ const auto &op = _graph.operations().at(op_idx);
+ const auto &outputs = op.getOutputs();
+ // TODO: Support multiple output
+ if (outputs.size() != 1)
+ throw std::runtime_error("Only 1 output operator is supported for recording minmax.");
+
+ auto tensor = tensor_reg->getITensor(outputs.at(0));
+
+ // Logic copied from MinMaxObserver.cpp.
+
+ // Filter Ops
+ if (tensor->is_constant())
+ return;
+
+ if (tensor->data_type() != ir::DataType::FLOAT32)
+ return;
+
+ switch (op.opcode())
+ {
+ // Operators with multiple outputs
+ case ir::OpCode::If:
+ case ir::OpCode::Split:
+ case ir::OpCode::SplitV:
+ case ir::OpCode::TopKV2:
+ case ir::OpCode::Unpack:
+ case ir::OpCode::While:
+ return;
+ // NOTE: Sin, Cos, Tanh's output is in [-1, 1]
+ // We may not need to dump those operators.
+ default:; // Do Nothing
+ }
+
+ // Otherwise, dump!
+ assert(tensor->data_type() == ir::DataType::FLOAT32);
+ const auto data = reinterpret_cast<float *>(tensor->buffer());
+ const auto num_elements = tensor->total_size() / sizeof(float);
+
+ float max = std::numeric_limits<float>::lowest();
+ float min = std::numeric_limits<float>::max();
+
+ bool all_nan = true;
+ for (size_t i = 0; i < num_elements; ++i)
+ {
+ const float number = data[i];
+ if (std::isnan(number))
+ continue;
+
+ if (number == std::numeric_limits<float>::lowest())
+ continue;
+
+ all_nan = false;
+
+ if (number > max)
+ max = number;
+
+ if (number < min)
+ min = number;
+ }
+
+ if (all_nan)
+ throw std::runtime_error("All values are NaN(Not a Number)");
+
+ _minmax_map.append({subg_idx, op_idx}, min, max);
+}
+
+void MinMaxRecorder::handleSubgraphEnd(ir::SubgraphIndex)
+{
+ // It would be better to dump at the end of model execution, not subgraph
+ // But it requires more changes than subgraph.
+ _h5dumper.dump(_minmax_map);
+}
+
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/MinMaxRecorder.h b/runtime/onert/core/src/exec/MinMaxRecorder.h
new file mode 100644
index 000000000..7a0817f5f
--- /dev/null
+++ b/runtime/onert/core/src/exec/MinMaxRecorder.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_MINMAX_RECORDER__
+#define __ONERT_EXEC_MINMAX_RECORDER__
+
+#include "ExecutionObservers.h"
+#include "ir/Index.h"
+#include "exec/MinMaxMap.h"
+#include "../dumper/h5/MinMaxDumper.h"
+
+#include <memory>
+
+namespace onert
+{
+namespace exec
+{
+
+class MinMaxRecorder : public IExecutionObserver
+{
+public:
+ MinMaxRecorder(const std::string &minmax_filepath, const ir::Graph &graph,
+ const backend::BackendContexts &backend_contexts);
+ void handleJobBegin(IExecutor *, ir::SubgraphIndex, ir::OperationIndex,
+ const backend::Backend *) override
+ {
+ return;
+ }
+ void handleJobEnd(IExecutor *, ir::SubgraphIndex, ir::OperationIndex,
+ const backend::Backend *) override;
+ void handleSubgraphEnd(ir::SubgraphIndex) override;
+
+private:
+ const ir::Graph &_graph;
+ const backend::BackendContexts &_backend_contexts;
+ dumper::h5::MinMaxDumper _h5dumper;
+ SMMinMaxMap _minmax_map;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_MINMAX_RECORDER__
diff --git a/runtime/onert/core/src/exec/ParallelExecutor.cc b/runtime/onert/core/src/exec/ParallelExecutor.cc
index ab234aacd..9da7c82b4 100644
--- a/runtime/onert/core/src/exec/ParallelExecutor.cc
+++ b/runtime/onert/core/src/exec/ParallelExecutor.cc
@@ -31,7 +31,7 @@ class HookFunction : public IFunction
public:
HookFunction(IFunction *fn, const std::function<void()> &setup,
const std::function<void()> &teardown)
- : _fn{fn}, _setup{setup}, _teardown{teardown}
+ : _fn{fn}, _setup{setup}, _teardown{teardown}
{
}
@@ -59,14 +59,13 @@ void ParallelExecutor::notify(uint32_t finished_job_id)
_cv_jobs.notify_all();
}
-ParallelExecutor::ParallelExecutor(
- std::unique_ptr<compiler::LoweredGraph> lowered_graph,
- const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
- const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorRegistries &tensor_regs, backend::TensorManagerSet &&tensor_mgrs,
- compiler::CodeMap &&code_map)
- : DataflowExecutor{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
- std::move(tensor_mgrs), std::move(code_map)}
+ParallelExecutor::ParallelExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
+ backend::BackendContexts &&backend_contexts,
+ const compiler::TensorRegistries &tensor_regs,
+ compiler::CodeMap &&code_map,
+ const util::TracingCtx *tracing_ctx)
+ : DataflowExecutor{std::move(lowered_graph), std::move(backend_contexts), tensor_regs,
+ std::move(code_map), tracing_ctx}
{
VERBOSE(ParallelExecutor) << "Constructing Parallel Executor" << std::endl;
}
@@ -76,12 +75,12 @@ void ParallelExecutor::executeImpl()
bool dynamic_input_exists = hasDynamicInput();
// Init scheduler
- // TODO Consider to have distinct backend set in LowerInfoMap
+ // TODO Consider to have distinct backend set in GraphLowerInfo
BackendSet backends;
- for (auto &itr : _lowered_graph->getLowerInfo()->op_seq)
- {
- backends.add(itr.second->backend());
- }
+ _lowered_graph->lower_info().operation.iterate(
+ [&](const ir::OperationIndex &, const compiler::OperationLowerInfo &lower_info) {
+ backends.add(lower_info.backend());
+ });
_scheduler = std::make_unique<ParallelScheduler>(backends);
assert(noWaitingJobs());
@@ -101,7 +100,10 @@ void ParallelExecutor::executeImpl()
VERBOSE(ParallelExecutor) << "INITIAL JOBS : " << _ready_jobs.size() << std::endl;
- _subject.notifyModelBegin(this);
+ auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_graph);
+
+ _subject.notifySubgraphBegin(profiling_subg_index);
+
while (true)
{
std::unique_lock<std::mutex> lock{_mu_jobs};
@@ -121,20 +123,24 @@ void ParallelExecutor::executeImpl()
lock.unlock();
- VERBOSE(ParallelExecutor) << "Assigning fn #" << job->index() << std::endl;
+ VERBOSE(ParallelExecutor) << "Assigning fn " << job->index() << std::endl;
auto job_index = job->index();
- auto op_sequence_index = _job_to_op_seq[job_index];
- auto op_seq = &_lowered_graph->op_seqs().at(op_sequence_index);
- auto backend = _lowered_graph->getLowerInfo()->op_seq.at(op_sequence_index)->backend();
- auto setup = [&, op_seq, backend]() { _subject.notifyJobBegin(this, op_seq, backend); };
- auto teardown = [&, job_index, op_seq, backend]() {
- _subject.notifyJobEnd(this, op_seq, backend);
+ auto op_ind = _job_to_op[job_index];
+ auto backend = _lowered_graph->lower_info().operation.at(op_ind).backend();
+ auto setup = [&, op_ind, backend]() {
+ _subject.notifyJobBegin(this, profiling_subg_index, op_ind, backend);
+ };
+ auto teardown = [&, job_index, op_ind, backend]() {
+ _subject.notifyJobEnd(this, profiling_subg_index, op_ind, backend);
notify(job_index);
};
+ job->fn_seq()->initRunning();
+
// dynamic tensor setting
- bool handle_dynamic_tensor = op_seq->has_dynamic_tensor() || dynamic_input_exists;
+ bool handle_dynamic_tensor =
+ _lowered_graph->getHasDynamicTensor(op_ind) || dynamic_input_exists;
job->fn_seq()->enableDynamicShapeInferer(handle_dynamic_tensor);
_scheduler->assign(std::make_unique<HookFunction>(job->fn_seq(), setup, teardown), backend);
@@ -145,7 +151,7 @@ void ParallelExecutor::executeImpl()
// Wait for all the jobs done
_scheduler->finish();
- _subject.notifyModelEnd(this);
+ _subject.notifySubgraphEnd(profiling_subg_index);
// Reset input info for the next execution
_input_info = _initial_input_info;
diff --git a/runtime/onert/core/src/exec/ParallelExecutor.h b/runtime/onert/core/src/exec/ParallelExecutor.h
index 929edfce9..7d459b0b4 100644
--- a/runtime/onert/core/src/exec/ParallelExecutor.h
+++ b/runtime/onert/core/src/exec/ParallelExecutor.h
@@ -17,17 +17,12 @@
#ifndef __ONERT_EXEC_PARALLEL_EXECUTOR_H__
#define __ONERT_EXEC_PARALLEL_EXECUTOR_H__
-#include <list>
-#include <queue>
-#include <unordered_map>
+#include "DataflowExecutor.h"
+#include "ParallelScheduler.h"
+
+#include "util/TracingCtx.h"
-#include "exec/FunctionSequence.h"
-#include "Job.h"
-#include "ir/OperandIndexSequence.h"
-#include "ir/Index.h"
#include <memory>
-#include "exec/DataflowExecutor.h"
-#include "ParallelScheduler.h"
namespace onert
{
@@ -48,13 +43,12 @@ public:
*
* @param lowered_graph LoweredGraph object
* @param tensor_builders Tensor builders that are currently used
- * @param code_map OpSequence and its code map
+ * @param code_map @c ir::Operation and its code map
*/
ParallelExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
- const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
- const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
- const compiler::TensorRegistries &tensor_regs,
- backend::TensorManagerSet &&tensor_mgrs, compiler::CodeMap &&code_map);
+ backend::BackendContexts &&backend_contexts,
+ const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map,
+ const util::TracingCtx *tracing_ctx);
void executeImpl() override;
diff --git a/runtime/onert/core/src/exec/ParallelScheduler.cc b/runtime/onert/core/src/exec/ParallelScheduler.cc
index 70c9c3dd6..538945631 100644
--- a/runtime/onert/core/src/exec/ParallelScheduler.cc
+++ b/runtime/onert/core/src/exec/ParallelScheduler.cc
@@ -30,7 +30,7 @@ ParallelScheduler::ParallelScheduler(const BackendSet &backends)
{
assert(!backends.empty());
- for (auto backend : backends)
+ for (auto &&backend : backends)
{
_thread_pools[backend] = std::make_unique<ThreadPool>();
}
@@ -45,7 +45,7 @@ void ParallelScheduler::assign(std::unique_ptr<IFunction> &&fn, const backend::B
void ParallelScheduler::finish()
{
- for (auto &itr : _thread_pools)
+ for (auto &&itr : _thread_pools)
{
itr.second->finish();
}
diff --git a/runtime/onert/core/src/exec/SingleModelExecutors.cc b/runtime/onert/core/src/exec/SingleModelExecutors.cc
new file mode 100644
index 000000000..4b954bab2
--- /dev/null
+++ b/runtime/onert/core/src/exec/SingleModelExecutors.cc
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SingleModelExecutors.h"
+
+#include "../backend/builtin/IOTensor.h"
+
+namespace onert
+{
+namespace exec
+{
+
+void SingleModelExecutors::emplace(const ir::ModelIndex &, const ir::SubgraphIndex &subg_index,
+ std::unique_ptr<IExecutor> exec)
+{
+ _executors.emplace(subg_index, std::move(exec));
+}
+
+IExecutor *SingleModelExecutors::at(const ir::ModelIndex &,
+ const ir::SubgraphIndex &subg_index) const
+{
+ return _executors.at(subg_index).get();
+}
+
+uint32_t SingleModelExecutors::inputSize() const
+{
+ return entryExecutor()->getInputTensors().size();
+}
+
+uint32_t SingleModelExecutors::outputSize() const
+{
+ return entryExecutor()->getOutputTensors().size();
+}
+
+const ir::OperandInfo &SingleModelExecutors::inputInfo(const ir::IOIndex &index) const
+{
+ return entryExecutor()->getInputTensors().at(index.value())->orig_info();
+}
+
+const ir::OperandInfo &SingleModelExecutors::outputInfo(const ir::IOIndex &index) const
+{
+ return entryExecutor()->getOutputTensors().at(index.value())->orig_info();
+}
+
+void SingleModelExecutors::execute(const IODescription &desc) { entryExecutor()->execute(desc); }
+
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/SingleModelExecutors.h b/runtime/onert/core/src/exec/SingleModelExecutors.h
new file mode 100644
index 000000000..98d629eae
--- /dev/null
+++ b/runtime/onert/core/src/exec/SingleModelExecutors.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_SINGLE_MODEL_EXECUTORS_H__
+#define __ONERT_EXEC_SINGLE_MODEL_EXECUTORS_H__
+
+#include "exec/IExecutors.h"
+#include "ir/NNPkg.h"
+
+namespace onert
+{
+namespace exec
+{
+
+/**
+ * @brief Class to gather executor set for single model NN package
+ */
+class SingleModelExecutors : public IExecutors
+{
+public:
+ /**
+ * @brief Construct a new SingleModelExecutors object
+ */
+ SingleModelExecutors(void) = default;
+ SingleModelExecutors(const SingleModelExecutors &) = delete;
+ SingleModelExecutors(SingleModelExecutors &&) = default;
+
+ /**
+ * @brief Destroy the SingleModelExecutors object
+ */
+ ~SingleModelExecutors() = default;
+
+public:
+ void emplace(const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+ std::unique_ptr<IExecutor> exec) override;
+
+ IExecutor *at(const ir::ModelIndex &model_index,
+ const ir::SubgraphIndex &subg_index) const override;
+
+ uint32_t inputSize() const override;
+
+ uint32_t outputSize() const override;
+
+ const ir::OperandInfo &inputInfo(const ir::IOIndex &index) const override;
+
+ const ir::OperandInfo &outputInfo(const ir::IOIndex &index) const override;
+
+ void execute(const IODescription &desc) override;
+
+private:
+ std::unordered_map<ir::SubgraphIndex, std::unique_ptr<IExecutor>> _executors;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_SINGLE_MODEL_EXECUTORS_H__
diff --git a/runtime/onert/core/src/exec/Sink.h b/runtime/onert/core/src/exec/Sink.h
deleted file mode 100644
index 6a99efe60..000000000
--- a/runtime/onert/core/src/exec/Sink.h
+++ /dev/null
@@ -1,199 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_EXEC_SINK_H__
-#define __ONERT_EXEC_SINK_H__
-
-#include "feature/nchw/Reader.h"
-#include "feature/nchw/View.h"
-#include "feature/nhwc/Reader.h"
-#include "feature/nhwc/View.h"
-
-#include <cassert>
-#include <memory>
-#include "util/Utils.h"
-#include <misc/feature/IndexIterator.h>
-
-namespace onert
-{
-namespace exec
-{
-struct ISink
-{
- virtual ~ISink() = default;
-
- virtual void pull(::onert::backend::ITensor &tensor) const = 0;
-};
-
-// Create second lever inheritance: the first lever is used as a reference type in use-case places
-template <typename T> class ITemplSink : public ISink
-{
-public:
- ITemplSink(void *output_buffer, const size_t &output_size, const ir::Shape &shape,
- const bool copy, ir::Layout io_layout)
- : _output_buffer{reinterpret_cast<T *>(output_buffer)}, _output_size{output_size},
- _shape{shape}, _copy{copy}, _io_layout{io_layout}
- {
- }
-
-protected:
- void pullUnif(onert::backend::ITensor &tensor) const
- {
- assert(((_io_layout == ir::Layout::NHWC && tensor.layout() == ir::Layout::NCHW) ||
- (_io_layout == ir::Layout::NCHW && tensor.layout() == ir::Layout::NHWC)) ||
- _copy);
- auto input_buffer = tensor.buffer();
- auto rank = _shape.rank();
-
- if (!tensor.has_padding() && rank < 4 + _copy)
- {
- memcpy(_output_buffer, input_buffer, _output_size);
- return;
- }
-
- switch (rank)
- {
- case 0:
- case 1:
- {
- memcpy(_output_buffer, input_buffer, _output_size);
- break;
- }
- case 2:
- {
- const int32_t copy_len = _shape.dim(1);
-
- for (auto i = 0; i < _shape.dim(0); ++i)
- {
- ir::Coordinates coords{i, 0};
- memcpy(_output_buffer + i * copy_len, input_buffer + tensor.calcOffset(coords),
- copy_len * sizeof(T));
- }
- break;
- }
- case 3:
- {
- const int32_t dim1 = _shape.dim(1);
- const int32_t dim2 = _shape.dim(2);
-
- for (auto i = 0; i < _shape.dim(0); ++i)
- {
- for (auto j = 0; j < _shape.dim(1); ++j)
- {
- ir::Coordinates coords{i, j, 0};
- memcpy(_output_buffer + i * dim1 * dim2 + j * dim2,
- input_buffer + tensor.calcOffset(coords), dim2 * sizeof(T));
- }
- }
- break;
- }
- case 4:
- {
- if (_copy)
- {
- const int32_t dim1 = _shape.dim(1);
- const int32_t dim2 = _shape.dim(2);
- const int32_t dim3 = _shape.dim(3);
-
- for (auto i = 0; i < _shape.dim(0); ++i)
- {
- for (auto j = 0; j < _shape.dim(1); ++j)
- {
- for (auto k = 0; k < _shape.dim(2); ++k)
- {
- ir::Coordinates coords{i, j, k, 0};
- memcpy(_output_buffer + i * dim1 * dim2 * dim3 + j * dim2 * dim3 + k * dim3,
- input_buffer + tensor.calcOffset(coords), dim3 * sizeof(T));
- }
- }
- }
- }
- else
- {
- const auto shape = _shape.asFeature(_io_layout);
-
- if (_io_layout == ir::Layout::NHWC)
- {
- const exec::feature::nchw::Reader<T> from(&tensor);
- exec::feature::nhwc::View<T> into(shape, _output_buffer, _output_size);
- feature::iterate(shape)
- << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
- const auto value = from.at(batch, ch, row, col);
- into.at(batch, row, col, ch) = value;
- };
- }
- else if (_io_layout == ir::Layout::NCHW)
- {
- const exec::feature::nhwc::Reader<T> from(&tensor);
- exec::feature::nchw::View<T> into(shape, _output_buffer, _output_size);
- feature::iterate(shape)
- << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
- const auto value = from.at(batch, row, col, ch);
- into.at(batch, ch, row, col) = value;
- };
- }
- else
- {
- throw std::runtime_error("Wrong Layout");
- }
- }
- break;
- }
- default:
- throw std::runtime_error("NYI: rank > 4");
- break;
- }
- }
-
-private:
- T *_output_buffer;
- const size_t _output_size;
- const ir::Shape _shape;
- const bool _copy;
- const ir::Layout _io_layout;
-};
-
-template <typename T> class PermutateSink final : public ITemplSink<T>
-{
-public:
- PermutateSink(void *output_buffer, const size_t &output_size, const ir::Shape &shape,
- ir::Layout io_layout)
- : ITemplSink<T>(output_buffer, output_size, shape, false, io_layout)
- {
- }
-
-public:
- void pull(onert::backend::ITensor &tensor) const override { ITemplSink<T>::pullUnif(tensor); }
-};
-
-// Only supports NHWC format front-end(NNAPI) now
-template <typename T> class CopySink final : public ITemplSink<T>
-{
-public:
- CopySink(void *output_buffer, const size_t &output_size, const ir::Shape &shape,
- ir::Layout io_layout = ir::Layout::UNKNOWN)
- : ITemplSink<T>(output_buffer, output_size, shape, true, io_layout)
- {
- }
-
-public:
- void pull(onert::backend::ITensor &tensor) const override { ITemplSink<T>::pullUnif(tensor); }
-};
-
-} // namespace exec
-} // namespace onert
-
-#endif // __ONERT_EXEC_SINK_H__
diff --git a/runtime/onert/core/src/exec/Source.h b/runtime/onert/core/src/exec/Source.h
deleted file mode 100644
index fb2be4dd8..000000000
--- a/runtime/onert/core/src/exec/Source.h
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_EXEC_SOURCE_H__
-#define __ONERT_EXEC_SOURCE_H__
-
-#include "feature/IndexIterator.h"
-#include "feature/nchw/Reader.h"
-#include "feature/nchw/View.h"
-#include "feature/nhwc/Reader.h"
-#include "feature/nhwc/View.h"
-
-#include <cassert>
-#include <memory>
-#include "util/Utils.h"
-#include <ir/Layout.h>
-#include "ir/Shape.h"
-
-namespace onert
-{
-namespace exec
-{
-
-struct ISource
-{
- virtual ~ISource() = default;
-
- virtual void push(::onert::backend::ITensor &tensor) const = 0;
-};
-
-// Create second lever inheritance: the first lever is used as a reference type in use-case places
-template <typename T> class ITemplSource : public ISource
-{
-public:
- ITemplSource(const void *input_buffer, const size_t &input_size, const ir::Shape &shape,
- const bool copy, ir::Layout io_layout)
- : _input_buffer{reinterpret_cast<const T *>(input_buffer)}, _input_size{input_size},
- _shape{shape}, _copy(copy), _io_layout{io_layout}
- {
- }
-
- virtual void push(::onert::backend::ITensor &tensor) const = 0;
-
-protected:
- void pushUnif(onert::backend::ITensor &tensor) const
- {
- assert(((_io_layout == ir::Layout::NHWC && tensor.layout() == ir::Layout::NCHW) ||
- (_io_layout == ir::Layout::NCHW && tensor.layout() == ir::Layout::NHWC)) ||
- _copy);
- auto output_buffer = tensor.buffer();
- auto rank = _shape.rank();
-
- if (!tensor.has_padding() && rank < 4 + _copy)
- {
- memcpy(output_buffer, _input_buffer, _input_size);
- return;
- }
-
- switch (rank)
- {
- case 0:
- case 1:
- {
- memcpy(output_buffer, _input_buffer, _input_size);
- break;
- }
- case 2:
- {
- const int32_t copy_len = _shape.dim(1);
-
- for (auto i = 0; i < _shape.dim(0); ++i)
- {
- ir::Coordinates coords{i, 0};
- memcpy(output_buffer + tensor.calcOffset(coords), _input_buffer + i * copy_len,
- copy_len * sizeof(T));
- }
- break;
- }
- case 3:
- {
- const int32_t dim1 = _shape.dim(1);
- const int32_t dim2 = _shape.dim(2);
-
- for (auto i = 0; i < _shape.dim(0); ++i)
- {
- for (auto j = 0; j < _shape.dim(1); ++j)
- {
- ir::Coordinates coords{i, j, 0};
- memcpy(output_buffer + tensor.calcOffset(coords),
- _input_buffer + i * dim1 * dim2 + j * dim2, dim2 * sizeof(T));
- }
- }
- break;
- }
- case 4:
- {
- if (_copy)
- {
- const int32_t dim1 = _shape.dim(1);
- const int32_t dim2 = _shape.dim(2);
- const int32_t dim3 = _shape.dim(3);
- for (auto i = 0; i < _shape.dim(0); ++i)
- {
- for (auto j = 0; j < _shape.dim(1); ++j)
- {
- for (auto k = 0; k < _shape.dim(2); ++k)
- {
- ir::Coordinates coords{i, j, k, 0};
- memcpy(output_buffer + tensor.calcOffset(coords),
- _input_buffer + i * dim1 * dim2 * dim3 + j * dim2 * dim3 + k * dim3,
- dim3 * sizeof(T));
- }
- }
- }
- }
- else
- {
- const auto shape = _shape.asFeature(_io_layout);
-
- if (_io_layout == ir::Layout::NCHW)
- {
- const exec::feature::nchw::Reader<T> from(shape, _input_buffer, _input_size);
- exec::feature::nhwc::View<T> into(&tensor);
- feature::iterate(shape)
- << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
- const auto value = from.at(batch, ch, row, col);
- into.at(batch, row, col, ch) = value;
- };
- }
- else if (_io_layout == ir::Layout::NHWC)
- {
- const exec::feature::nhwc::Reader<T> from(shape, _input_buffer, _input_size);
- exec::feature::nchw::View<T> into(&tensor);
- feature::iterate(shape)
- << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
- const auto value = from.at(batch, row, col, ch);
- into.at(batch, ch, row, col) = value;
- };
- }
- else
- {
- throw std::runtime_error("Wrong Layout");
- }
- }
-
- break;
- }
- default:
- throw std::runtime_error("NYI: rank > 4");
- break;
- }
- }
-
-private:
- const T *_input_buffer;
- const size_t _input_size;
- const ir::Shape _shape;
- const bool _copy;
- const ir::Layout _io_layout;
-};
-
-template <typename T> class PermutateSource final : public ITemplSource<T>
-{
-public:
- PermutateSource(const void *input_buffer, const size_t &input_size, const ir::Shape &shape,
- ir::Layout io_layout)
- : ITemplSource<T>(input_buffer, input_size, shape, false, io_layout)
- {
- }
-
-public:
- void push(onert::backend::ITensor &tensor) const override
- {
- // do NHWC_TO_NCHW or NCHW_TO_NHWC permutation
- ITemplSource<T>::pushUnif(tensor);
- }
-};
-
-template <typename T> class CopySource final : public ITemplSource<T>
-{
-public:
- CopySource(const void *input_buffer, const size_t &input_size, const ir::Shape &shape,
- ir::Layout io_layout = ir::Layout::UNKNOWN)
- : ITemplSource<T>(input_buffer, input_size, shape, true, io_layout)
- {
- }
-
-public:
- void push(onert::backend::ITensor &tensor) const override { ITemplSource<T>::pushUnif(tensor); }
-};
-
-} // namespace exec
-} // namespace onert
-
-#endif // __ONERT_EXEC_SOURCE_H__
diff --git a/runtime/onert/core/src/exec/ThreadPool.cc b/runtime/onert/core/src/exec/ThreadPool.cc
index c8e0e3265..bf85e59f6 100644
--- a/runtime/onert/core/src/exec/ThreadPool.cc
+++ b/runtime/onert/core/src/exec/ThreadPool.cc
@@ -48,7 +48,7 @@ uint32_t ThreadPool::numJobsInQueue() { return _worker.numJobsInQueue(); }
void ThreadPool::join()
{
- for (auto &thread : _threads)
+ for (auto &&thread : _threads)
{
thread.join();
}
diff --git a/runtime/onert/core/src/exec/feature/MockTensor.h b/runtime/onert/core/src/exec/feature/MockTensor.h
new file mode 100644
index 000000000..1d2d375e2
--- /dev/null
+++ b/runtime/onert/core/src/exec/feature/MockTensor.h
@@ -0,0 +1,66 @@
+
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/ITensor.h"
+
+template <typename T> class MockTensor : public onert::backend::ITensor
+{
+public:
+ MockTensor<T>(onert::ir::Shape &shape, T *buf, onert::ir::Layout layout)
+ : _buf(reinterpret_cast<uint8_t *>(buf)), _shape(shape), _layout(layout)
+ {
+ }
+
+public:
+ uint8_t *buffer() const override { return _buf; }
+
+ size_t calcOffset(const onert::ir::Coordinates &coords) const override
+ {
+ size_t rank = _shape.rank();
+ rank = rank == 0 ? 1 : rank;
+ size_t offset = 0;
+ for (size_t i = 0; i < rank; ++i)
+ {
+ auto dim = _shape.rank() == 0 ? 1 : _shape.dim(i);
+ offset = offset * dim + coords[i];
+ }
+ offset *= sizeof(T);
+
+ return offset;
+ }
+
+ onert::ir::Shape getShape() const override { return _shape; }
+
+public: // DUMMY methods
+ size_t total_size() const override { return 0; }
+ onert::ir::Layout layout() const override { return _layout; }
+ onert::ir::DataType data_type() const override { return onert::ir::DataType::UINT8; }
+ float data_scale() const override { return 0; }
+ int32_t data_zero_point() const override { return 0; }
+ const std::vector<float> &data_scales() const override { return _dummy_scales; }
+ const std::vector<int32_t> &data_zero_points() const override { return _dummy_zerops; }
+ bool has_padding() const override { return false; }
+ void access(const std::function<void(ITensor &tensor)> &fn) override {}
+ bool is_dynamic() const override { return false; }
+
+private:
+ uint8_t *_buf = nullptr;
+ onert::ir::Shape _shape;
+ onert::ir::Layout _layout = onert::ir::Layout::UNKNOWN;
+ std::vector<float> _dummy_scales;
+ std::vector<int32_t> _dummy_zerops;
+};
diff --git a/runtime/onert/core/src/exec/feature/nchw/Reader.h b/runtime/onert/core/src/exec/feature/nchw/Reader.h
index 7be9df4d5..d5e3cb97c 100644
--- a/runtime/onert/core/src/exec/feature/nchw/Reader.h
+++ b/runtime/onert/core/src/exec/feature/nchw/Reader.h
@@ -36,35 +36,36 @@ namespace nchw
template <typename T> class Reader : public feature::Reader<T>
{
public:
- // Construct for buffer of model inputs
- Reader(const ir::FeatureShape &shape, const T *ptr, size_t len)
- : _shape{shape}, _ptr{reinterpret_cast<const uint8_t *>(ptr)}, _len{len}
+ using Strides = ir::FeatureShape;
+ // Construct for buffer and strides
+ Reader(const ir::FeatureShape &shape, const Strides &strides, const T *ptr, size_t len)
+ : _shape{shape}, _strides{strides}, _ptr{reinterpret_cast<const uint8_t *>(ptr)}, _len{len}
{
- assert(shape.N * shape.C * shape.H * shape.W * sizeof(T) == len);
-
- // No padding
- _strides.W = sizeof(T);
- _strides.H = shape.W * sizeof(T);
- _strides.C = shape.W * shape.H * sizeof(T);
- _strides.N = shape.W * shape.H * shape.C * sizeof(T);
+ UNUSED_RELEASE(len); // Workaround for unused variable in release mode
+ assert(len == static_cast<size_t>(strides.N != 0
+ ? shape.N * strides.N
+ : strides.C != 0 ? shape.C * strides.C
+ : strides.H != 0 ? shape.H * strides.H
+ : shape.W * strides.W));
}
// Construct for backend tensor
Reader(backend::ITensor *tensor)
- : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
+ : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
{
assert(tensor->layout() == ir::Layout::NCHW);
const auto start_offset = tensor->calcOffset({0, 0, 0, 0});
- _strides.W = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
- _strides.H = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
- _strides.C = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
- _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
-
- _shape.W = tensor->dimension(3);
- _shape.H = tensor->dimension(2);
- _shape.C = tensor->dimension(1);
- _shape.N = tensor->dimension(0);
+ auto shape = tensor->getShape();
+ _strides.W = shape.dim(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
+ _strides.H = shape.dim(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
+ _strides.C = shape.dim(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
+ _strides.N = shape.dim(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
+
+ _shape.W = shape.dim(3);
+ _shape.H = shape.dim(2);
+ _shape.C = shape.dim(1);
+ _shape.N = shape.dim(0);
}
public:
@@ -104,7 +105,6 @@ private:
private:
// TODO Remove _shape
ir::FeatureShape _shape;
- using Strides = ir::FeatureShape;
Strides _strides;
const uint8_t *_ptr;
size_t _len;
diff --git a/runtime/onert/core/src/exec/feature/nchw/Reader.test.cc b/runtime/onert/core/src/exec/feature/nchw/Reader.test.cc
new file mode 100644
index 000000000..f439cafb5
--- /dev/null
+++ b/runtime/onert/core/src/exec/feature/nchw/Reader.test.cc
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Reader.h"
+
+#include "../MockTensor.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert::exec::feature;
+
+template <typename T> class Reader_nchw : public testing::Test
+{
+public:
+ void setData(std::initializer_list<T> list) { _data = std::make_shared<std::vector<T>>(list); }
+
+ void setShape(int32_t batch, int32_t depth, int32_t height, int32_t width)
+ {
+ _shape = onert::ir::FeatureShape(batch, depth, height, width);
+ }
+
+ void setStride(int32_t batch, int32_t depth, int32_t height, int32_t width)
+ {
+ auto elem_size = sizeof(T);
+ _stride = onert::ir::FeatureShape(batch * elem_size, depth * elem_size, height * elem_size,
+ width * elem_size);
+ }
+
+ void createReader()
+ {
+ _reader =
+ std::make_shared<nchw::Reader<T>>(_shape, _stride, _data->data(), _data->size() * sizeof(T));
+ }
+
+ void createUsingMockTensor()
+ {
+ onert::ir::Shape shape = {_shape.N, _shape.H, _shape.W, _shape.C};
+ _tensor = std::make_shared<MockTensor<T>>(shape, _data->data(), onert::ir::Layout::NCHW);
+ _reader = std::make_shared<nchw::Reader<T>>(_tensor.get());
+ }
+
+ std::shared_ptr<Reader<T>> _reader = nullptr;
+
+private:
+ std::shared_ptr<std::vector<T>> _data = nullptr;
+ onert::ir::FeatureShape _shape;
+ onert::ir::FeatureShape _stride;
+ std::shared_ptr<MockTensor<T>> _tensor = nullptr;
+};
+
+using ReaderTypes = ::testing::Types<float, int32_t, uint8_t, int8_t, int16_t>;
+TYPED_TEST_SUITE(Reader_nchw, ReaderTypes);
+
+TYPED_TEST(Reader_nchw, basic_reader)
+{
+ this->setData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
+ this->setShape(1, 2, 3, 2);
+ this->setStride(12, 6, 2, 1);
+ this->createReader();
+
+ // Data: NCHW
+ // Shape: NCHW
+ ASSERT_EQ(this->_reader->at(0, 1, 1, 0), 8);
+ ASSERT_EQ(this->_reader->at(1, 1, 0), 8);
+
+ // Data: NCHW
+ // Shape: NCHW
+ this->createUsingMockTensor();
+
+ ASSERT_EQ(this->_reader->at(0, 1, 1, 0), 6);
+ ASSERT_EQ(this->_reader->at(1, 1, 0), 6);
+}
diff --git a/runtime/onert/core/src/exec/feature/nchw/View.h b/runtime/onert/core/src/exec/feature/nchw/View.h
index dbaf1a91e..cdbb0cd7c 100644
--- a/runtime/onert/core/src/exec/feature/nchw/View.h
+++ b/runtime/onert/core/src/exec/feature/nchw/View.h
@@ -37,8 +37,10 @@ namespace nchw
template <typename T> class View final : public Reader<T>
{
public:
+ using Strides = typename Reader<T>::Strides;
// Construct for buffer of model inputs
- View(const ir::FeatureShape &shape, T *ptr, size_t len) : Reader<T>{shape, ptr, len}
+ View(const ir::FeatureShape &shape, const Strides &strides, T *ptr, size_t len)
+ : Reader<T>{shape, strides, ptr, len}
{
// DO NOTHING
}
diff --git a/runtime/onert/core/src/exec/feature/nchw/View.test.cc b/runtime/onert/core/src/exec/feature/nchw/View.test.cc
new file mode 100644
index 000000000..c6dcda710
--- /dev/null
+++ b/runtime/onert/core/src/exec/feature/nchw/View.test.cc
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "View.h"
+
+#include "../MockTensor.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert::exec::feature;
+
+template <typename T> class View_nchw : public testing::Test
+{
+public:
+ void setData(std::initializer_list<T> list) { _data = std::make_shared<std::vector<T>>(list); }
+
+ void setShape(int32_t batch, int32_t depth, int32_t height, int32_t width)
+ {
+ _shape = onert::ir::FeatureShape(batch, depth, height, width);
+ }
+
+ void setStride(int32_t batch, int32_t depth, int32_t height, int32_t width)
+ {
+ auto elem_size = sizeof(T);
+ _stride = onert::ir::FeatureShape(batch * elem_size, depth * elem_size, height * elem_size,
+ width * elem_size);
+ }
+
+ void createView()
+ {
+ _view =
+ std::make_shared<nchw::View<T>>(_shape, _stride, _data->data(), _data->size() * sizeof(T));
+ }
+
+ void createUsingMockTensor()
+ {
+ onert::ir::Shape shape = {_shape.N, _shape.H, _shape.W, _shape.C};
+ _tensor = std::make_shared<MockTensor<T>>(shape, _data->data(), onert::ir::Layout::NCHW);
+ _view = std::make_shared<nchw::View<T>>(_tensor.get());
+ }
+
+ std::shared_ptr<nchw::View<T>> _view = nullptr;
+
+private:
+ std::shared_ptr<std::vector<T>> _data = nullptr;
+ onert::ir::FeatureShape _shape;
+ onert::ir::FeatureShape _stride;
+ std::shared_ptr<MockTensor<T>> _tensor = nullptr;
+};
+
+using ViewTypes = ::testing::Types<float, int32_t, uint8_t, int8_t, int16_t>;
+TYPED_TEST_SUITE(View_nchw, ViewTypes);
+
+TYPED_TEST(View_nchw, basic_view)
+{
+ this->setData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
+ this->setShape(1, 2, 3, 2);
+ this->setStride(12, 6, 2, 1);
+ this->createView();
+
+ // Data: NCHW
+ // Shape: NCHW
+ ASSERT_EQ(this->_view->at(0, 1, 1, 0), 8);
+ ASSERT_EQ(this->_view->at(1, 1, 0), 8);
+
+ // Data: NCHW
+ // Shape: NCHW
+ this->createUsingMockTensor();
+
+ ASSERT_EQ(this->_view->at(0, 1, 1, 0), 6);
+ ASSERT_EQ(this->_view->at(1, 1, 0), 6);
+}
diff --git a/runtime/onert/core/src/exec/feature/nhwc/Reader.h b/runtime/onert/core/src/exec/feature/nhwc/Reader.h
index 7730cee72..0bc1ee95b 100644
--- a/runtime/onert/core/src/exec/feature/nhwc/Reader.h
+++ b/runtime/onert/core/src/exec/feature/nhwc/Reader.h
@@ -37,36 +37,36 @@ namespace nhwc
template <typename T> class Reader : public feature::Reader<T>
{
public:
- // Construct for buffer of model inputs
- Reader(const ir::FeatureShape &shape, const T *ptr, size_t len)
- : _shape{shape}, _ptr{reinterpret_cast<const uint8_t *>(ptr)}, _len{len}
+ using Strides = ir::FeatureShape;
+ // Construct for buffer and strides
+ Reader(const ir::FeatureShape &shape, const Strides &strides, const T *ptr, size_t len)
+ : _shape{shape}, _strides{strides}, _ptr{reinterpret_cast<const uint8_t *>(ptr)}, _len{len}
{
UNUSED_RELEASE(len); // Workaround for unused variable in release mode
- assert(shape.N * shape.C * shape.H * shape.W * sizeof(T) == len);
-
- // No padding
- _strides.C = sizeof(T);
- _strides.W = shape.C * sizeof(T);
- _strides.H = shape.C * shape.W * sizeof(T);
- _strides.N = shape.C * shape.W * shape.H * sizeof(T);
+ assert(len == static_cast<size_t>(strides.N != 0
+ ? shape.N * strides.N
+ : strides.H != 0 ? shape.H * strides.H
+ : strides.W != 0 ? shape.W * strides.W
+ : shape.C * strides.C));
}
// Construct for backend tensor
Reader(const backend::ITensor *tensor)
- : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
+ : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
{
assert(tensor->layout() == ir::Layout::NHWC);
const auto start_offset = tensor->calcOffset({0, 0, 0, 0});
- _strides.C = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
- _strides.W = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
- _strides.H = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
- _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
-
- _shape.C = tensor->dimension(3);
- _shape.W = tensor->dimension(2);
- _shape.H = tensor->dimension(1);
- _shape.N = tensor->dimension(0);
+ auto shape = tensor->getShape();
+ _strides.C = shape.dim(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
+ _strides.W = shape.dim(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
+ _strides.H = shape.dim(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
+ _strides.N = shape.dim(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
+
+ _shape.C = shape.dim(3);
+ _shape.W = shape.dim(2);
+ _shape.H = shape.dim(1);
+ _shape.N = shape.dim(0);
}
public:
@@ -106,7 +106,6 @@ private:
private:
// TODO Remove _shape
ir::FeatureShape _shape;
- using Strides = ir::FeatureShape;
Strides _strides;
const uint8_t *_ptr;
size_t _len;
diff --git a/runtime/onert/core/src/exec/feature/nhwc/Reader.test.cc b/runtime/onert/core/src/exec/feature/nhwc/Reader.test.cc
new file mode 100644
index 000000000..773199042
--- /dev/null
+++ b/runtime/onert/core/src/exec/feature/nhwc/Reader.test.cc
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Reader.h"
+
+#include "../MockTensor.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert::exec::feature;
+
+template <typename T> class Reader_nhwc : public testing::Test
+{
+public:
+ void setData(std::initializer_list<T> list) { _data = std::make_shared<std::vector<T>>(list); }
+
+ void setShape(int32_t batch, int32_t depth, int32_t height, int32_t width)
+ {
+ _shape = onert::ir::FeatureShape(batch, depth, height, width);
+ }
+
+ void setStride(int32_t batch, int32_t depth, int32_t height, int32_t width)
+ {
+ auto elem_size = sizeof(T);
+ _stride = onert::ir::FeatureShape(batch * elem_size, depth * elem_size, height * elem_size,
+ width * elem_size);
+ }
+
+ void createReader()
+ {
+ _reader =
+ std::make_shared<nhwc::Reader<T>>(_shape, _stride, _data->data(), _data->size() * sizeof(T));
+ }
+
+ void createUsingMockTensor()
+ {
+ onert::ir::Shape shape = {_shape.N, _shape.H, _shape.W, _shape.C};
+ _tensor = std::make_shared<MockTensor<T>>(shape, _data->data(), onert::ir::Layout::NHWC);
+ _reader = std::make_shared<nhwc::Reader<T>>(_tensor.get());
+ }
+
+ std::shared_ptr<nhwc::Reader<T>> _reader = nullptr;
+
+private:
+ std::shared_ptr<std::vector<T>> _data = nullptr;
+ onert::ir::FeatureShape _shape;
+ onert::ir::FeatureShape _stride;
+ std::shared_ptr<MockTensor<T>> _tensor = nullptr;
+};
+
+using ReaderTypes = ::testing::Types<float, int32_t, uint8_t, int8_t, int16_t>;
+TYPED_TEST_SUITE(Reader_nhwc, ReaderTypes);
+TYPED_TEST_SUITE(MockTensorReader_nhwc, ReaderTypes);
+
+TYPED_TEST(Reader_nhwc, basic_reader)
+{
+ this->setData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
+ this->setShape(1, 2, 3, 2);
+ this->setStride(12, 1, 6, 2);
+ this->createReader();
+
+ // Data: NCHW
+ // Shape: NHWC
+ ASSERT_EQ(this->_reader->at(0, 1, 1, 0), 8);
+ ASSERT_EQ(this->_reader->at(1, 1, 0), 8);
+
+ // Data: NHWC
+ // Shape: NHWC
+ this->createUsingMockTensor();
+
+ ASSERT_EQ(this->_reader->at(0, 1, 1, 0), 6);
+ ASSERT_EQ(this->_reader->at(1, 1, 0), 6);
+}
diff --git a/runtime/onert/core/src/exec/feature/nhwc/View.h b/runtime/onert/core/src/exec/feature/nhwc/View.h
index 72c8c3415..c98d050c3 100644
--- a/runtime/onert/core/src/exec/feature/nhwc/View.h
+++ b/runtime/onert/core/src/exec/feature/nhwc/View.h
@@ -17,7 +17,7 @@
#ifndef __ONERT_EXEC_FEATURE_NHWC_VIEW_H__
#define __ONERT_EXEC_FEATURE_NHWC_VIEW_H__
-#include "../Reader.h"
+#include "Reader.h"
#include <cassert>
#include <cstddef>
@@ -38,8 +38,10 @@ namespace nhwc
template <typename T> class View final : public Reader<T>
{
public:
- // Construct for buffer of model inputs
- View(const ir::FeatureShape &shape, T *ptr, size_t len) : Reader<T>{shape, ptr, len}
+ using Strides = typename Reader<T>::Strides;
+ // Construct for buffer and strides
+ View(const ir::FeatureShape &shape, const Strides &strides, T *ptr, size_t len)
+ : Reader<T>{shape, strides, ptr, len}
{
// DO NOTHING
}
diff --git a/runtime/onert/core/src/exec/feature/nhwc/View.test.cc b/runtime/onert/core/src/exec/feature/nhwc/View.test.cc
new file mode 100644
index 000000000..bdd73d5a7
--- /dev/null
+++ b/runtime/onert/core/src/exec/feature/nhwc/View.test.cc
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "View.h"
+
+#include "../MockTensor.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert::exec::feature;
+
+template <typename T> class View_nhwc : public testing::Test
+{
+public:
+ void setData(std::initializer_list<T> list) { _data = std::make_shared<std::vector<T>>(list); }
+
+ void setShape(int32_t batch, int32_t depth, int32_t height, int32_t width)
+ {
+ _shape = onert::ir::FeatureShape(batch, depth, height, width);
+ }
+
+ void setStride(int32_t batch, int32_t depth, int32_t height, int32_t width)
+ {
+ auto elem_size = sizeof(T);
+ _stride = onert::ir::FeatureShape(batch * elem_size, depth * elem_size, height * elem_size,
+ width * elem_size);
+ }
+
+ void createView()
+ {
+ _view =
+ std::make_shared<nhwc::View<T>>(_shape, _stride, _data->data(), _data->size() * sizeof(T));
+ }
+
+ void createUsingMockTensor()
+ {
+ onert::ir::Shape shape = {_shape.N, _shape.H, _shape.W, _shape.C};
+ _tensor = std::make_shared<MockTensor<T>>(shape, _data->data(), onert::ir::Layout::NHWC);
+ _view = std::make_shared<nhwc::View<T>>(_tensor.get());
+ }
+
+ std::shared_ptr<nhwc::View<T>> _view = nullptr;
+
+private:
+ std::shared_ptr<std::vector<T>> _data = nullptr;
+ onert::ir::FeatureShape _shape;
+ onert::ir::FeatureShape _stride;
+ std::shared_ptr<MockTensor<T>> _tensor = nullptr;
+};
+
+using ViewTypes = ::testing::Types<float, int32_t, uint8_t, int8_t, int16_t>;
+TYPED_TEST_SUITE(View_nhwc, ViewTypes);
+TYPED_TEST_SUITE(MockTensorView_nhwc, ViewTypes);
+
+TYPED_TEST(View_nhwc, basic_view)
+{
+ this->setData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
+ this->setShape(1, 2, 3, 2);
+ this->setStride(12, 1, 6, 2);
+ this->createView();
+
+ // Data: NCHW
+ // Shape: NHWC
+ ASSERT_EQ(this->_view->at(0, 1, 1, 0), 8);
+ ASSERT_EQ(this->_view->at(1, 1, 0), 8);
+
+ // Data: NHWC
+ // Shape: NHWC
+ this->createUsingMockTensor();
+
+ ASSERT_EQ(this->_view->at(0, 1, 1, 0), 6);
+ ASSERT_EQ(this->_view->at(1, 1, 0), 6);
+}
diff --git a/runtime/onert/core/src/exec/train/TrainableExecutor.cc b/runtime/onert/core/src/exec/train/TrainableExecutor.cc
new file mode 100644
index 000000000..9c7e70c29
--- /dev/null
+++ b/runtime/onert/core/src/exec/train/TrainableExecutor.cc
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TrainableExecutor.h"
+#ifdef RUY_PROFILER
+#include "ruy/profiler/instrumentation.h"
+#endif
+
+#include <misc/polymorphic_downcast.h>
+
+namespace onert
+{
+namespace exec
+{
+namespace train
+{
+
+TrainableExecutor::TrainableExecutor(
+ std::unique_ptr<compiler::train::LoweredTrainableGraph> lowered_graph,
+ backend::train::TrainableBackendContexts &&backend_contexts,
+ const compiler::train::TensorRegistries &tensor_regs,
+ compiler::train::TrainableCodeMap &&code_map, const std::vector<ir::OperationIndex> &order,
+ const util::TracingCtx *tracing_ctx)
+ : _lowered_graph{std::move(lowered_graph)}, _backend_contexts{std::move(backend_contexts)},
+ _trainable_graph{_lowered_graph->trainable_graph()}, _tensor_regs{std::move(tensor_regs)},
+ _mutex(), _tracing_ctx(tracing_ctx)
+{
+ auto build_tensor_list = [&](const auto &ind_seq, auto &tensors) {
+ assert(tensors.empty());
+ for (auto &&ind : ind_seq)
+ {
+ backend::ITensor *tensor = tensor_regs.getITensor(ind);
+ assert(tensor != nullptr);
+ auto io_tensor = nnfw::misc::polymorphic_downcast<backend::builtin::IOTensor *>(tensor);
+ tensors.push_back(io_tensor);
+ }
+ };
+ build_tensor_list(_trainable_graph.getInputs(), _input_tensors);
+ build_tensor_list(_trainable_graph.getOutputs(), _output_tensors);
+
+ for (auto &&index : order)
+ {
+ auto &trainable_code = code_map.at(index);
+ _code.emplace_back(std::move(trainable_code));
+ }
+}
+
+void TrainableExecutor::execute(const std::vector<backend::IPortableTensor *> &,
+ const std::vector<backend::IPortableTensor *> &)
+{
+ throw std::runtime_error("TrainableExecutor does not support multiple subgraphs yet");
+}
+
+void TrainableExecutor::forward(const IODescription &desc, bool training)
+{
+ // For thread-safe, use mutex
+ // TODO: if all used backends on this executor are thread-safe,
+ // do not need to use mutex (otherwise, use mutex)
+ std::lock_guard<std::mutex> lock(_mutex);
+
+ // TODO Update IO tensors if desc has dynamic input
+ // Set input(s)
+ assert(_input_tensors.size() == desc.inputs.size());
+ for (uint32_t i = 0; i < _input_tensors.size(); ++i)
+ {
+ auto tensor = _input_tensors[i];
+
+ // TODO Check if (desc.inputs[i] == nullptr)
+ // TODO Better design for ITensor? (we need const_cast as ITensor is writable)
+ tensor->setUserTensor(static_cast<uint8_t *>(const_cast<void *>(desc.inputs[i]->buffer)),
+ desc.inputs[i]->size);
+ }
+
+ if (!training)
+ {
+ // Set output(s)
+ assert(_output_tensors.size() == desc.outputs.size());
+ for (uint32_t i = 0; i < _output_tensors.size(); ++i)
+ {
+ auto tensor = _output_tensors[i];
+
+ if (desc.outputs[i] == nullptr)
+ throw std::runtime_error{"Output " + std::to_string(i) + "'s buffer is not set."};
+ tensor->setUserTensor(static_cast<uint8_t *>(desc.outputs[i]->buffer), desc.outputs[i]->size);
+ }
+ }
+
+ forwardImpl(training);
+
+ // TODO Update output(s) desc if desc has dynamic input
+}
+
+void TrainableExecutor::forwardImpl(bool training)
+{
+ if (_tracing_ctx)
+ {
+ auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_trainable_graph.graph());
+
+ _subject.notifySubgraphBegin(profiling_subg_index);
+ for (auto &&code : _code)
+ {
+ const auto backend = code.lower_info->backend();
+// TODO : Move ruy profiler into ExecutionObserver
+#ifdef RUY_PROFILER
+ ruy::profiler::ScopeLabel label(code.op->name());
+#endif
+ _subject.notifyJobBegin(this, profiling_subg_index, code.op_ind, backend);
+
+ auto &tn_seq = code.tn_seq;
+ tn_seq->forward(training);
+
+ _subject.notifyJobEnd(this, profiling_subg_index, code.op_ind, backend);
+ }
+ _subject.notifySubgraphEnd(profiling_subg_index);
+ }
+ else
+ {
+ for (auto &&code : _code)
+ {
+// TODO : Move ruy profiler into ExecutionObserver
+#ifdef RUY_PROFILER
+ ruy::profiler::ScopeLabel label(code.op->name());
+#endif
+ auto &tn_seq = code.tn_seq;
+ tn_seq->forward(training);
+ }
+ }
+}
+
+void TrainableExecutor::backward(const IODescription &, uint32_t training_step)
+{
+ // For thread-safe, use mutex
+ // TODO: if all used backends on this executor are thread-safe,
+ // do not need to use mutex (otherwise, use mutex)
+ std::lock_guard<std::mutex> lock(_mutex);
+
+ backwardImpl(training_step);
+}
+
+void TrainableExecutor::backwardImpl(uint32_t training_step)
+{
+ if (_tracing_ctx)
+ {
+ auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_trainable_graph.graph());
+
+ _subject.notifySubgraphBegin(profiling_subg_index);
+ for (auto it = _code.rbegin(); it != _code.rend(); ++it)
+ {
+ const auto &code = *it;
+ const auto backend = code.lower_info->backend();
+// TODO : Move ruy profiler into ExecutionObserver
+#ifdef RUY_PROFILER
+ ruy::profiler::ScopeLabel label(code.op->name());
+#endif
+ _subject.notifyJobBegin(this, profiling_subg_index, code.op_ind, backend);
+
+ auto &tn_seq = code.tn_seq;
+ tn_seq->backward(training_step);
+
+ _subject.notifyJobEnd(this, profiling_subg_index, code.op_ind, backend);
+ }
+ _subject.notifySubgraphEnd(profiling_subg_index);
+ }
+ else
+ {
+ for (auto it = _code.rbegin(); it != _code.rend(); ++it)
+ {
+ const auto &code = *it;
+// TODO : Move ruy profiler into ExecutionObserver
+#ifdef RUY_PROFILER
+ ruy::profiler::ScopeLabel label(code.op->name());
+#endif
+ auto &tn_seq = code.tn_seq;
+ tn_seq->backward(training_step);
+ }
+ }
+}
+
+float TrainableExecutor::getLoss(const ir::IOIndex &pred_io_ind) const
+{
+ const auto &loss_ind = _trainable_graph.getLossIndex(pred_io_ind);
+ if (loss_ind.undefined())
+ throw std::runtime_error{"Loss " + std::to_string(loss_ind.value()) + " is not defined."};
+ backend::ITensor *tensor = _tensor_regs.getITensor(loss_ind);
+ auto loss_buf = reinterpret_cast<float *>(tensor->buffer());
+ return *loss_buf;
+}
+
+} // namespace train
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/train/TrainableExecutor.h b/runtime/onert/core/src/exec/train/TrainableExecutor.h
new file mode 100644
index 000000000..6b645305f
--- /dev/null
+++ b/runtime/onert/core/src/exec/train/TrainableExecutor.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_TRAIN_TRAINABLE_EXECUTOR_H_
+#define __ONERT_EXEC_TRAIN_TRAINABLE_EXECUTOR_H_
+
+#include "exec/IExecutor.h"
+
+#include "../ExecutionObservee.h"
+#include "../../compiler/train/TensorRegistries.h"
+
+#include "backend/train/TrainableBackendContext.h"
+#include "compiler/train/TrainableCodeMap.h"
+#include "compiler/train/LoweredTrainableGraph.h"
+#include "ir/Index.h"
+#include "util/TracingCtx.h"
+
+namespace onert
+{
+namespace exec
+{
+namespace train
+{
+
+class TrainableExecutor : public IExecutor
+{
+public:
+ /**
+ * @brief Construct a new TrainableExecutor object
+ * @param lowered_graph LoweredTrainableGraph object
+ * @param tensor_builders Tensor builders that are currently used
+ * @param code_map @c ir::Operation and its code map
+ */
+ TrainableExecutor(std::unique_ptr<compiler::train::LoweredTrainableGraph> lowered_graph,
+ backend::train::TrainableBackendContexts &&backend_contexts,
+ const compiler::train::TensorRegistries &tensor_regs,
+ compiler::train::TrainableCodeMap &&code_map,
+ const std::vector<ir::OperationIndex> &order,
+ const util::TracingCtx *tracing_ctx);
+
+public:
+ const ir::Graph &graph() const final { return _trainable_graph.graph(); }
+
+ void execute(const IODescription &desc) override { forward(desc, false); };
+
+ void execute(const std::vector<backend::IPortableTensor *> &inputs,
+ const std::vector<backend::IPortableTensor *> &outputs) override;
+
+ void forward(const IODescription &desc, bool training);
+ void backward(const IODescription &desc, uint32_t training_step);
+
+ // Used only in Dataflow and Parallel Executors
+ void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>> ranks) final
+ {
+ _indexed_ranks = std::move(ranks);
+ };
+
+ void addObserver(std::unique_ptr<IExecutionObserver> ref) { _subject.add(std::move(ref)); };
+
+ const std::vector<backend::builtin::IOTensor *> &getInputTensors() const override
+ {
+ return _input_tensors;
+ }
+
+ const std::vector<backend::builtin::IOTensor *> &getOutputTensors() const override
+ {
+ return _output_tensors;
+ }
+
+ float getLoss(const ir::IOIndex &pred_io_ind) const;
+
+ backend::train::TrainableBackendContexts &getBackendContexts() { return _backend_contexts; }
+
+private:
+ void forwardImpl(bool training);
+ void backwardImpl(uint32_t training_step);
+
+private:
+ std::vector<compiler::train::TrainableCodeAndInfo> _code;
+ ExecutionObservee _subject;
+ std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
+ std::unique_ptr<compiler::train::LoweredTrainableGraph> _lowered_graph;
+ backend::train::TrainableBackendContexts _backend_contexts;
+ const ir::train::TrainableGraph &_trainable_graph;
+ compiler::train::TensorRegistries _tensor_regs;
+ std::vector<backend::builtin::IOTensor *> _input_tensors;
+ std::vector<backend::builtin::IOTensor *> _output_tensors;
+ std::mutex _mutex;
+ const util::TracingCtx *_tracing_ctx;
+};
+
+} // namespace train
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_TRAIN_TRAINABLE_EXECUTOR_H_
diff --git a/runtime/onert/core/src/exec/train/TrainableExecutors.cc b/runtime/onert/core/src/exec/train/TrainableExecutors.cc
new file mode 100644
index 000000000..ba39bf0f0
--- /dev/null
+++ b/runtime/onert/core/src/exec/train/TrainableExecutors.cc
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TrainableExecutors.h"
+
+#include "../../backend/builtin/IOTensor.h"
+
+#include <misc/polymorphic_downcast.h>
+
+namespace onert
+{
+namespace exec
+{
+namespace train
+{
+
+void TrainableExecutors::emplace(const ir::ModelIndex &, const ir::SubgraphIndex &subg_index,
+ std::unique_ptr<IExecutor> exec)
+{
+ std::unique_ptr<TrainableExecutor> t_exec{
+ nnfw::misc::polymorphic_downcast<TrainableExecutor *>(exec.release())};
+ _executors.emplace(subg_index, std::move(t_exec));
+}
+
+TrainableExecutor *TrainableExecutors::at(const ir::ModelIndex &,
+ const ir::SubgraphIndex &subg_index) const
+{
+ return _executors.at(subg_index).get();
+}
+
+uint32_t TrainableExecutors::inputSize() const { return entryExecutor()->getInputTensors().size(); }
+
+uint32_t TrainableExecutors::outputSize() const
+{
+ return entryExecutor()->getOutputTensors().size();
+}
+
+const ir::OperandInfo &TrainableExecutors::inputInfo(const ir::IOIndex &index) const
+{
+ return entryExecutor()->getInputTensors().at(index.value())->orig_info();
+}
+
+const ir::OperandInfo &TrainableExecutors::outputInfo(const ir::IOIndex &index) const
+{
+ return entryExecutor()->getOutputTensors().at(index.value())->orig_info();
+}
+
+void TrainableExecutors::execute(const IODescription &desc)
+{
+ if (_executors.size() > 1)
+ throw std::runtime_error("TrainableExecutors does not support multiple executors yet");
+ entryExecutor()->forward(desc, false);
+
+ // TODO Support multple executors
+}
+
+void TrainableExecutors::train(const IODescription &desc, uint32_t training_step)
+{
+ if (_executors.size() > 1)
+ throw std::runtime_error("TrainableExecutors does not support multiple executors yet");
+ entryExecutor()->forward(desc, true);
+ entryExecutor()->backward(desc, training_step);
+
+ // TODO Support multple executors
+}
+
+float TrainableExecutors::getLoss(const ir::IOIndex &index) const
+{
+ if (_executors.size() > 1)
+ throw std::runtime_error("TrainableExecutors does not support multiple executors yet");
+ return entryExecutor()->getLoss(index);
+}
+
+} // namespace train
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/train/TrainableExecutors.h b/runtime/onert/core/src/exec/train/TrainableExecutors.h
new file mode 100644
index 000000000..db6d198b1
--- /dev/null
+++ b/runtime/onert/core/src/exec/train/TrainableExecutors.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_TRAIN_TRAINABLE_EXECUTORS_H__
+#define __ONERT_EXEC_TRAIN_TRAINABLE_EXECUTORS_H__
+
+#include "TrainableExecutor.h"
+#include "exec/IExecutors.h"
+#include "ir/NNPkg.h"
+
+namespace onert
+{
+namespace exec
+{
+namespace train
+{
+
+/**
+ * @brief Class to gather executor set for trainable model NN package
+ */
+class TrainableExecutors : public IExecutors
+{
+public:
+ /**
+ * @brief Construct a new TrainableExecutors object
+ */
+ TrainableExecutors(void) = default;
+ TrainableExecutors(const TrainableExecutors &) = delete;
+ TrainableExecutors(TrainableExecutors &&) = default;
+
+ /**
+ * @brief Destroy the TrainableExecutors object
+ */
+ ~TrainableExecutors() = default;
+
+public:
+ TrainableExecutors &operator=(const TrainableExecutors &) = delete;
+ TrainableExecutors &operator=(TrainableExecutors &&) = default;
+
+public:
+ void emplace(const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+ std::unique_ptr<IExecutor> exec) override;
+
+ TrainableExecutor *at(const ir::ModelIndex &model_index,
+ const ir::SubgraphIndex &subg_index) const override;
+
+ TrainableExecutor *entryExecutor() const { return at(ir::ModelIndex{0}, ir::SubgraphIndex{0}); }
+
+ uint32_t inputSize() const override;
+
+ uint32_t outputSize() const override;
+
+ const ir::OperandInfo &inputInfo(const ir::IOIndex &index) const override;
+
+ const ir::OperandInfo &outputInfo(const ir::IOIndex &index) const override;
+
+ void execute(const IODescription &desc) override;
+
+ /**
+ * @brief Train
+ *
+ * @param desc IO information
+ * @param training_step The number of iterations of an training process.
+ * In other words, the number of gradient update.
+ */
+ void train(const IODescription &desc, uint32_t training_step);
+
+ float getLoss(const ir::IOIndex &index) const;
+
+private:
+ // TODO Append model index to ModelIndex
+ std::unordered_map<ir::SubgraphIndex, std::unique_ptr<TrainableExecutor>> _executors;
+};
+
+} // namespace train
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_TRAIN_TRAINABLE_EXECUTORS_H__
diff --git a/runtime/onert/core/src/exec/train/TrainableFnSequence.cc b/runtime/onert/core/src/exec/train/TrainableFnSequence.cc
new file mode 100644
index 000000000..084b3d708
--- /dev/null
+++ b/runtime/onert/core/src/exec/train/TrainableFnSequence.cc
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "exec/train/TrainableFnSequence.h"
+
+namespace onert
+{
+namespace exec
+{
+namespace train
+{
+
+void TrainableFnSequence::forward(bool training)
+{
+ for (const auto &function : _functions)
+ {
+ function->forward(training);
+ }
+}
+
+void TrainableFnSequence::backward(uint32_t training_step)
+{
+ for (auto it = _functions.rbegin(); it != _functions.rend(); ++it)
+ {
+ (*it)->backward();
+ }
+
+ for (const auto &applier : _appliers)
+ {
+ applier->applyGradient(training_step);
+ }
+}
+
+void TrainableFnSequence::append(std::unique_ptr<ITrainableFunction> &&function)
+{
+ _functions.push_back(std::move(function));
+}
+
+void TrainableFnSequence::append(std::unique_ptr<IGradientApplier> &&applier)
+{
+ _appliers.push_back(std::move(applier));
+}
+
+void TrainableFnSequence::iterate(const std::function<void(ITrainableFunction &)> &fn)
+{
+ for (const auto &func : _functions)
+ {
+ fn(*func);
+ }
+}
+
+} // namespace train
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/train/optimizer/OptimizerCode.cc b/runtime/onert/core/src/exec/train/optimizer/OptimizerCode.cc
new file mode 100644
index 000000000..72b581bf6
--- /dev/null
+++ b/runtime/onert/core/src/exec/train/optimizer/OptimizerCode.cc
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "exec/train/optimizer/OptimizerCode.h"
+
+#include <unordered_map>
+
+namespace onert
+{
+namespace exec
+{
+namespace train
+{
+namespace optimizer
+{
+
+std::string toString(OptimizerCode code)
+{
+ static const std::unordered_map<OptimizerCode, const char *> map{
+ {OptimizerCode::Invalid, "Invalid"},
+ {OptimizerCode::SGD, "SGD"},
+ {OptimizerCode::Adam, "Adam"}};
+ return map.at(code);
+}
+
+} // namespace optimizer
+} // namespace train
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/train/optimizer/OptimizerHelpers.h b/runtime/onert/core/src/exec/train/optimizer/OptimizerHelpers.h
new file mode 100644
index 000000000..66a08b50f
--- /dev/null
+++ b/runtime/onert/core/src/exec/train/optimizer/OptimizerHelpers.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_TRAIN_OPTIMIZER_OPTIMIZER_HELPERS_H__
+#define __ONERT_EXEC_TRAIN_OPTIMIZER_OPTIMIZER_HELPERS_H__
+
+#include "backend/IPortableTensor.h"
+
+namespace onert
+{
+namespace exec
+{
+namespace train
+{
+namespace optimizer
+{
+
+template <typename T, typename L>
+void elementwise(const ir::Shape &shape, const backend::ITensor &src, backend::ITensor &dst,
+ const L &f)
+{
+ ShapeLoop(shape, [&](const ir::Coordinates &coords) {
+ const T src_val = *reinterpret_cast<const T *>(src.buffer() + src.calcOffset(coords));
+ T *dst_data = reinterpret_cast<T *>(dst.buffer() + dst.calcOffset(coords));
+ *dst_data = f(src_val, *dst_data);
+ });
+}
+
+} // namespace optimizer
+} // namespace train
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_TRAIN_OPTIMIZER_OPTIMIZER_HELPERS_H__
diff --git a/runtime/onert/core/src/exec/train/optimizer/SGD.cc b/runtime/onert/core/src/exec/train/optimizer/SGD.cc
new file mode 100644
index 000000000..abfbc1b4b
--- /dev/null
+++ b/runtime/onert/core/src/exec/train/optimizer/SGD.cc
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <exec/train/optimizer/SGD.h>
+
+#include "OptimizerHelpers.h"
+
+namespace onert
+{
+namespace exec
+{
+namespace train
+{
+namespace optimizer
+{
+
+double SGD::getLearningRate(uint32_t) const
+{
+ // TODO Use iteration, momentum, and nesterov
+ return _learning_rate;
+}
+
+void SGD::applyGradient(const UpdateFactors &factors) const
+{
+ const auto lr = getLearningRate(std::get<size_t>(factors));
+ const auto &grad_tensor = std::get<const backend::IPortableTensor &>(factors);
+ auto &trainable_tensor = std::get<backend::train::ITrainableTensor &>(factors);
+ assert(trainable_tensor.data_type() == grad_tensor.data_type());
+
+ const auto shape = trainable_tensor.getShape();
+ const auto &grad_shape = grad_tensor.get_info().shape();
+
+ // TODO Support for different shapes
+ if (shape != grad_shape)
+ {
+ throw std::runtime_error("SGD: Invalid gradient tensor");
+ }
+
+ switch (grad_tensor.data_type())
+ {
+ case ir::DataType::FLOAT32:
+ elementwise<float>(shape, grad_tensor, trainable_tensor,
+ [&](float src, float dst) -> float { return dst - src * lr; });
+ break;
+ default:
+ throw std::runtime_error("SGD: Not supported data type");
+ }
+}
+
+} // namespace optimizer
+} // namespace train
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/interp/Buffer.h b/runtime/onert/core/src/interp/Buffer.h
deleted file mode 100644
index 24938f74f..000000000
--- a/runtime/onert/core/src/interp/Buffer.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Buffer.h
- * @brief This file contains Buffer interface and InternalBuffer, ExternalBuffer class
- */
-#ifndef __ONERT_INTERP_BUFFER_H__
-#define __ONERT_INTERP_BUFFER_H__
-
-#include <memory>
-
-#include "ir/Data.h"
-
-namespace onert
-{
-namespace interp
-{
-
-/**
- * @brief Interface for writable data area
- */
-class Buffer : public ir::Data
-{
-public:
- /**
- * @brief Return writable pointer for data area
- * @return Writable pointer
- */
- virtual uint8_t *baseWritable(void) const = 0;
-};
-
-/**
- * @brief Class for internally allocated data area
- */
-class InternalBuffer final : public Buffer
-{
-public:
- InternalBuffer(size_t size) : _base{std::make_unique<uint8_t[]>(size)}, _size{size}
- {
- // DO NOTHING
- }
-
-public:
- size_t size(void) const override { return _size; }
- const uint8_t *base(void) const override { return _base.get(); }
- uint8_t *baseWritable(void) const override { return _base.get(); }
-
-private:
- std::unique_ptr<uint8_t[]> _base;
- size_t _size;
-};
-
-/**
- * @brief Class for data area from outside
- */
-class ExternalBuffer final : public Buffer
-{
-public:
- ExternalBuffer(uint8_t *base, size_t size) : _base{base}, _size{size}
- {
- // DO NOTHING
- }
-
-public:
- size_t size(void) const override { return _size; }
- const uint8_t *base(void) const override { return _base; }
- uint8_t *baseWritable(void) const override { return _base; }
-
-private:
- uint8_t *_base;
- size_t _size;
-};
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_BUFFER_H__
diff --git a/runtime/onert/core/src/interp/ExecEnv.h b/runtime/onert/core/src/interp/ExecEnv.h
deleted file mode 100644
index 7f577ea6e..000000000
--- a/runtime/onert/core/src/interp/ExecEnv.h
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file ExecEnv.h
- * @brief This file contains ExecEnv to access interpreter tensor and execution status
- */
-#ifndef __ONERT_INTERP_EXEC_ENV_H_
-#define __ONERT_INTERP_EXEC_ENV_H_
-
-#include <unordered_set>
-
-#include "ir/Graph.h"
-#include "Tensor.h"
-
-namespace onert
-{
-namespace interp
-{
-
-/**
- * @brief Class to gather interpreter execution environment
- * Each interpreter instance own execution environment
- */
-class ExecEnv
-{
-public:
- /**
- * @brief Construct a new Exec Env object (deleted)
- */
- ExecEnv(void) = delete;
- /**
- * @brief Construct a new ExecEnv object
- * @param[in] graph Graph to execute by interpreter
- */
- explicit ExecEnv(const ir::Graph &graph) : _graph(graph)
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Return graph to execute
- * @return Graph
- */
- const ir::Graph &graph(void) const { return _graph; }
- /**
- * @brief Assign tensor to environment which have allocated or assigned buffer
- * @param[in] index Tensor index
- * @param[in] tensor Tensor
- */
- void assignTensor(const ir::OperandIndex index, std::shared_ptr<ITensor> tensor)
- {
- assert(tensor->bufferRO() != nullptr);
- _tensors.emplace(index, tensor);
- }
-
- /**
- * @brief Return tensor pointer in environment
- * @param[in] index Tensor index
- * can_optional @c True if tensor can be optional input, otherwise @c false
- * @return Tensor pointer
- */
- const ITensor *tensorAt(const ir::OperandIndex index, bool can_optional = false) const
- {
- if (_tensors.find(index) == _tensors.end())
- {
- // It may optional input,
- // otherwise input is not set by runtime user
- if (can_optional)
- {
- return nullptr;
- }
-
- throw std::runtime_error{"ExecEnv: Input is not set"};
- }
-
- return _tensors.at(index).get();
- }
-
- /**
- * @brief Check environment contains tensor
- * @param[in] index Tensor index
- * @return @c true if environment contain tensor, otherwise @c false
- */
- bool contains(const ir::OperandIndex index) const
- {
- return (_tensors.find(index) != _tensors.end());
- }
-
- /**
- * @brief Allocate tensor using operand info
- * @param[in] index Tensor index
- * @param[in] info Operand info
- * @note If already allocated, just return
- * @TODO More smart allocation policy
- */
- void allocateIfNeeded(const ir::OperandIndex index, const ir::OperandInfo &info)
- {
- // already allocated, or constant
- if (contains(index))
- {
- return;
- }
-
- // Buffer from external (ex. model output)
- auto tensor = std::make_shared<Tensor>(info);
- if (isExtBuffer(index))
- {
- tensor->setBuffer(_external_buffers.at(index));
- assignTensor(index, tensor);
-
- return;
- }
-
- tensor->setBuffer(std::make_shared<InternalBuffer>(tensor->total_size()));
- assignTensor(index, tensor);
- _buffers.insert(index);
- }
-
- /**
- * @brief Allocate read-only tensor and share data with other tensor
- * @param[in] index Tensor index
- * @param[in] info Operand info
- * @param[in] index_to_share Tensor index that have data to share
- */
- void allocateAndShareIfNeeded(const ir::OperandIndex index, const ir::OperandInfo &info,
- const ir::OperandIndex index_to_share)
- {
- if (!contains(index_to_share))
- {
- throw std::runtime_error{"Cannot find tensor to share data"};
- }
-
- // already allocated
- if (contains(index))
- {
- return;
- }
-
- if (isExtBuffer(index))
- {
- auto tensor = std::make_shared<Tensor>(info);
- tensor->setBuffer(_external_buffers.at(index));
- assignTensor(index, tensor);
- }
- else
- {
- auto tensor = std::make_shared<ROTensor>(info);
- tensor->setData(tensorAt(index_to_share)->shareData());
- assignTensor(index, tensor);
- _buffers.insert(index);
- }
- }
-
- /**
- * @brief Free buffer if allocated by allocateIfNeed
- * @param[in] index Tensor index
- * @note If allocated by outside, just return
- */
- void freeIfAllocated(const ir::OperandIndex index)
- {
- if (_buffers.find(index) != _buffers.end())
- {
- _tensors.at(index)->releaseData();
- }
- }
-
- /**
- * @brief Assign ExternalBuffer into external buffer map
- * @param[in] index Tensor index
- * @param[in] buffer External buffer
- */
- void assignExternalBuffer(const ir::OperandIndex index, std::shared_ptr<ExternalBuffer> buffer)
- {
- _external_buffers.emplace(index, buffer);
- }
-
-private:
- bool isExtBuffer(const ir::OperandIndex index)
- {
- return (_external_buffers.find(index) != _external_buffers.end());
- }
-
-private:
- const ir::Graph &_graph;
- // Tensor map to use in interpreter
- // It should map tensors that have allocated or assigned buffer pointer
- std::unordered_map<ir::OperandIndex, std::shared_ptr<ITensor>> _tensors;
- // Tensors allocated by allocateIfNeed (buffer)
- std::unordered_set<ir::OperandIndex> _buffers;
- // Tensor buffer from external
- std::unordered_map<ir::OperandIndex, std::shared_ptr<ExternalBuffer>> _external_buffers;
-};
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_EXEC_ENV_H_
diff --git a/runtime/onert/core/src/interp/InterpExecutor.cc b/runtime/onert/core/src/interp/InterpExecutor.cc
deleted file mode 100644
index cd31a4dca..000000000
--- a/runtime/onert/core/src/interp/InterpExecutor.cc
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "interp/InterpExecutor.h"
-#include "interp/ExecEnv.h"
-#include "interp/Interpreter.h"
-
-#include "util/logging.h"
-
-#include <memory>
-
-namespace onert
-{
-namespace interp
-{
-
-void InterpExecutor::execute(const exec::IODescription &desc)
-{
- /************************************************************************
- * Prepare execution model (submodel)
- It may execute divided model
- but now consider model inference is done at interpreter
- ***********************************************************************/
- ir::OperandIndexMap<std::shared_ptr<ITensor>> tensor_map;
-
- for (uint32_t n = 0; n < _graph.getInputs().size(); n++)
- {
- ir::IOIndex index{n};
- const auto input_index = _graph.getInputs().at(index);
-
- const auto input = desc.inputs.at(n).get();
- if (input == nullptr)
- {
- // Optional input
- continue;
- }
-
- auto input_tensor = std::make_shared<ROTensor>(input->info);
- input_tensor->setData(std::make_shared<const ir::ExternalData>(
- reinterpret_cast<const uint8_t *>(input->buffer), input->size));
- tensor_map[input_index] = input_tensor;
- }
-
- /************************************************************************
- * Prepare execution environment
- Execution environment will be assigned to invoked interpreter instance
- ***********************************************************************/
-
- std::unique_ptr<ExecEnv> interp_env = std::make_unique<ExecEnv>(_graph);
-
- // Assign input/output tensor into interpreter execution environment
- for (auto index : _graph.getInputs())
- {
- if (tensor_map.find(index) != tensor_map.end())
- {
- VERBOSE(INTERPRETER) << "Assign input tensor. operand index:" << index.value() << std::endl;
- interp_env->assignTensor(index, tensor_map.at(index));
- }
- }
-
- for (uint32_t n = 0; n < _graph.getOutputs().size(); n++)
- {
- ir::IOIndex index{n};
- const auto output_index = _graph.getOutputs().at(index);
- const auto output = desc.outputs.at(n).get();
- if (output == nullptr)
- {
- // Optional output
- continue;
- }
-
- VERBOSE(INTERPRETER) << "Set out buffer to ExecEnv. operand index:" << output_index.value()
- << std::endl;
-
- interp_env->assignExternalBuffer(
- output_index, std::make_shared<ExternalBuffer>(reinterpret_cast<uint8_t *>(output->buffer),
- output->size));
- }
-
- // Allocate constant tensor
- _graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
- if (obj.isConstant())
- {
- VERBOSE(INTERPRETER) << "Allocate and assign constant tensor. operand index:" << ind.value()
- << std::endl;
-
- assert(obj.data());
- auto const_tensor = std::make_shared<ROTensor>(obj.info());
- // Assume that interpreter's tensor layout is same with model (NHWC)
- const_tensor->setData(
- std::make_shared<ir::ExternalData>(obj.data()->base(), obj.info().total_size()));
- interp_env->assignTensor(ind, const_tensor);
- }
- });
-
- /*****************************************************************************
- * Invoke interpreter
- ****************************************************************************/
-
- interp::Interpreter interp(std::move(interp_env));
- interp.run();
-
- /*****************************************************************************
- * Invoked interpreter run is finished
- ****************************************************************************/
-
- // If interpreter execute submodel
- // 1. Get tensor output of submodel into tensor_map to save result
- // 2. Generate new ExecEnv for next interpretation
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/InterpExecutor.h b/runtime/onert/core/src/interp/InterpExecutor.h
deleted file mode 100644
index 2e3f3ca54..000000000
--- a/runtime/onert/core/src/interp/InterpExecutor.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file InterpExecutor.h
- * @brief This file contains InterpExecutor class\n
- * to manage interpreter execution and environment
- */
-#ifndef __ONERT_INTERP_INTERP_EXECUTOR_H__
-#define __ONERT_INTERP_INTERP_EXECUTOR_H__
-
-#include "ir/OperandIndexMap.h"
-#include "ir/Graph.h"
-#include "exec/IExecutor.h"
-
-namespace onert
-{
-namespace interp
-{
-
-class ITensor;
-
-/**
- * @brief Class to execute model using interpreter
- */
-class InterpExecutor final : public exec::IExecutor
-{
-public:
- explicit InterpExecutor(const ir::Graph &graph) : _graph(graph)
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Return graph object
- * @return Graph object
- */
- const ir::Graph &graph() final { return _graph; }
- void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>>) override{
- // Not implemented
- };
- /**
- * @brief Start execution
- * @note It should be called after setting input and output buffer
- */
- void execute(const exec::IODescription &desc) final;
-
-private:
- const ir::Graph &_graph;
- ir::OperandIndexMap<std::shared_ptr<ITensor>> _tensor_map;
-};
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_INTERP_EXECUTOR_H__
diff --git a/runtime/onert/core/src/interp/InterpOps.lst b/runtime/onert/core/src/interp/InterpOps.lst
deleted file mode 100644
index 0714df38a..000000000
--- a/runtime/onert/core/src/interp/InterpOps.lst
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef INTERP_OP
-#error Define INTERP_OP before including this file
-#endif
-
-// Supported operation name in interpreter
-//
-// Same list with Operations.lst
-// Make comment out if operation is not supported in interpreter
-INTERP_OP(BinaryArithmetic)
-//INTERP_OP(BatchToSpaceND)
-//INTERP_OP(Cast)
-INTERP_OP(Conv2D)
-INTERP_OP(DepthwiseConv2D)
-INTERP_OP(Pool2D)
-INTERP_OP(Concat)
-INTERP_OP(FullyConnected)
-//INTERP_OP(Reduce)
-INTERP_OP(Reshape)
-INTERP_OP(Softmax)
-//INTERP_OP(Squeeze)
-//INTERP_OP(Slice)
-//INTERP_OP(StridedSlice)
-INTERP_OP(ElementwiseActivation)
-//INTERP_OP(Transpose)
-//INTERP_OP(Exp)
-//INTERP_OP(Comparison)
-//INTERP_OP(LogicalNot)
-//INTERP_OP(LSTM)
-//INTERP_OP(RSQRT)
-//INTERP_OP(ResizeBilinear)
-//INTERP_OP(RNN)
-//INTERP_OP(Floor)
-//INTERP_OP(SpaceToBatchND)
-//INTERP_OP(SpaceToDepth)
-//INTERP_OP(EmbeddingLookup)
-//INTERP_OP(L2Normalization)
-//INTERP_OP(HashtableLookup)
-INTERP_OP(InstanceNorm)
-//INTERP_OP(PReLU)
-INTERP_OP(TransposeConv)
-//INTERP_OP(SQRT)
-//INTERP_OP(SquaredDifference)
-//INTERP_OP(TopKV2)
-INTERP_OP(Gather)
-//INTERP_OP(Neg)
-//INTERP_OP(Abs)
-//INTERP_OP(ArgMax)
-//INTERP_OP(Dequantize)
-//INTERP_OP(LocalResponseNormalization)
-//INTERP_OP(DepthToSpace)
-//INTERP_OP(Pack)
-//INTERP_OP(Split)
-//INTERP_OP(Unpack)
-INTERP_OP(Pad)
-//INTERP_OP(Custom)
-//INTERP_OP(Permute)
-//INTERP_OP(OneHot)
diff --git a/runtime/onert/core/src/interp/Interpreter.cc b/runtime/onert/core/src/interp/Interpreter.cc
deleted file mode 100644
index b92afbe73..000000000
--- a/runtime/onert/core/src/interp/Interpreter.cc
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Interpreter.h"
-
-#include <stack>
-#include <unordered_set>
-
-#include "Registration.h"
-
-#include "ir/OperandIndexMap.h"
-#include "util/logging.h"
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace interp
-{
-
-// TODO more structured execution kernel implementation
-// TODO use cker for execution
-// TODO divide tensor prepare and execution
-// TODO introduce memory manager (buffer allocate and free)
-class OperationExecutor
-{
-public:
- OperationExecutor(ExecEnv *env) : _env{env}
- {
-#define INTERP_OP(InternalName) _kernels[ir::OpCode::InternalName] = get##InternalName();
-#include "InterpOps.lst"
-#undef INTERP_OP
- }
-
- void execute(const ir::OperationIndex &idx)
- {
- const ir::Operation &node = _env->graph().operations().at(idx);
- const auto nodeName = node.name();
- VERBOSE(INTERPRETER) << "Prepare output operands and execute " << nodeName
- << " operation (id: " << idx.value() << ")" << std::endl;
-
- const auto nodeOpCode = node.opcode();
- if (_kernels.find(nodeOpCode) == _kernels.end())
- {
- throw std::runtime_error{"Interpreter: Operation " + nodeName + " is not yet implemented"};
- }
-
- if (_kernels[nodeOpCode]->prepare != nullptr)
- {
- _kernels[nodeOpCode]->prepare(_env, node);
- }
- _kernels[nodeOpCode]->invoke(_env, node);
- }
-
-private:
- ExecEnv *_env;
- std::unordered_map<ir::OpCode, OpKernel *> _kernels;
-};
-
-void Interpreter::run()
-{
- VERBOSE(INTERPRETER) << "Interpreter is invoked " << std::endl;
-
- // operand_stack: save operands prepared to use
- std::stack<ir::OperandIndex> operand_stack;
-
- // Note: We should push input first, then constant.
- // We use use-def for find operators ready to execution,
- // but Use-Def cannot handle parameters (maybe constant, but not always)
- // Note: If all model inputs are constant, it may not work (depend on tensors' order).
- // But that scenario may not exist
- for (auto ind : _env->graph().getInputs())
- {
- VERBOSE(INTERPRETER) << "Input: Push to operand stack " << ind.value() << std::endl;
-
- operand_stack.push(ind);
- }
-
- _env->graph().operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
- if (obj.isConstant())
- {
- VERBOSE(INTERPRETER) << "Constant: Push to operand stack " << ind.value() << std::endl;
-
- operand_stack.push(ind);
- }
- });
-
- // Execution
- std::unordered_set<ir::OperandIndex> ready_check;
- std::unordered_set<ir::OperationIndex> executed;
- OperationExecutor executor{_env.get()};
- while (!operand_stack.empty())
- {
- const auto current_operand_index = operand_stack.top();
- operand_stack.pop();
- VERBOSE(INTERPRETER) << "Poped operand " << current_operand_index.value()
- << " is checked ready to use" << std::endl;
-
- assert(ready_check.find(current_operand_index) == ready_check.end());
- ready_check.insert(current_operand_index);
-
- // Find prepared operations by scan use of current operand
- std::stack<ir::OperationIndex> operation_stack;
- const auto use_operators = _env->graph().operands().at(current_operand_index).getUses();
- for (const auto &use_operator : use_operators)
- {
- // Assumption: all parameters are ready to use
- bool operator_ready = true;
- for (auto input_index : _env->graph().operations().at(use_operator).getInputs())
- {
- if (ready_check.find(input_index) == ready_check.end())
- {
- operator_ready = false;
- break;
- }
- }
-
- if (operator_ready)
- {
- VERBOSE(INTERPRETER) << "Ready to execute operation " << use_operator.value() << std::endl;
- operation_stack.push(use_operator);
- }
- }
-
- while (!operation_stack.empty())
- {
- const auto current_operation_index = operation_stack.top();
- operation_stack.pop();
- VERBOSE(INTERPRETER) << "Poped operation: " << current_operation_index.value() << "("
- << _env->graph().operations().at(current_operation_index).name() << ")"
- << std::endl;
-
- // execution
- // 1. Prepare output tensor
- // 2. Call operation kernel
- executor.execute(current_operation_index);
- executed.insert(current_operation_index);
-
- // 3. Push each output into operand stack
- const auto def_operands = _env->graph().operations().at(current_operation_index).getOutputs();
- for (auto def_operand : def_operands)
- {
- VERBOSE(INTERPRETER) << "Buffer: Push to operand stack " << def_operand.value()
- << std::endl;
- operand_stack.push(def_operand);
- }
-
- // 4. Free if lifetime of buffer operands used by input is finished
- for (auto input_index : _env->graph().operations().at(current_operation_index).getInputs())
- {
- const auto use_operators = _env->graph().operands().at(input_index).getUses();
- bool dead_buffer = true;
- for (const auto &use_operator : use_operators)
- {
- if (executed.find(use_operator) == executed.end())
- {
- dead_buffer = false;
- break;
- }
- }
-
- if (dead_buffer)
- {
- _env->freeIfAllocated(input_index);
- }
- }
- }
- }
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/Interpreter.h b/runtime/onert/core/src/interp/Interpreter.h
deleted file mode 100644
index d2165f538..000000000
--- a/runtime/onert/core/src/interp/Interpreter.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Interpreter.h
- * @brief This file contains Interpreter class for interpretation
- */
-#ifndef __ONERT_INTERP_INTERPRETER_H__
-#define __ONERT_INTERP_INTERPRETER_H__
-
-#include "ExecEnv.h"
-
-namespace onert
-{
-namespace interp
-{
-
-/**
- * @brief Class for interpretation
- */
-class Interpreter
-{
-
-public:
- /**
- * @brief Construct a new Interpreter object (deleted)
- */
- Interpreter() = delete;
- /**
- * @brief Construct a new Interpreter object
- * @param[in] env Execution environment variable for interpreter object
- */
- Interpreter(std::unique_ptr<ExecEnv> env) : _env{std::move(env)}
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Run interpreter until there is no operation to execute
- */
- void run();
-
-private:
- std::unique_ptr<ExecEnv> _env;
-};
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_INTERPRETER_H__
diff --git a/runtime/onert/core/src/interp/Registration.h b/runtime/onert/core/src/interp/Registration.h
deleted file mode 100644
index 956b92a53..000000000
--- a/runtime/onert/core/src/interp/Registration.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_INTERP_REGISTRATION_H__
-#define __ONERT_INTERP_REGISTRATION_H__
-
-#include "ExecEnv.h"
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace interp
-{
-
-struct OpKernel
-{
- std::function<void(ExecEnv *, const ir::Operation &)> prepare;
- std::function<void(const ExecEnv *, const ir::Operation &)> invoke;
-};
-
-// Defined in operations/ directory
-#define INTERP_OP(InternalName) OpKernel *get##InternalName();
-#include "InterpOps.lst"
-#undef INTERP_OP
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_REGISTRATION_H__
diff --git a/runtime/onert/core/src/interp/Tensor.cc b/runtime/onert/core/src/interp/Tensor.cc
deleted file mode 100644
index 07f8b75dc..000000000
--- a/runtime/onert/core/src/interp/Tensor.cc
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Tensor.h"
-
-#define NO_USE(a) (void)(a)
-
-namespace onert
-{
-namespace interp
-{
-
-void ITensor::access(const std::function<void(backend::ITensor &tensor)> &fn) { fn(*this); }
-
-size_t ROTensor::calcOffset(const ir::Coordinates &coords) const
-{
- NO_USE(coords);
- throw std::runtime_error("offset_element_in_bytes is not supported for cpu::Tensor now.");
-}
-
-size_t Tensor::calcOffset(const ir::Coordinates &coords) const
-{
- NO_USE(coords);
- throw std::runtime_error("offset_element_in_bytes is not supported for cpu::Tensor now.");
-}
-
-ir::Layout ROTensor::layout() const
-{
- // TODO Changes to return frontend layout
- return ir::Layout::NHWC;
-}
-
-ir::Layout Tensor::layout() const
-{
- // TODO Changes to return frontend layout
- return ir::Layout::NHWC;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/Tensor.h b/runtime/onert/core/src/interp/Tensor.h
deleted file mode 100644
index 008a4b9d4..000000000
--- a/runtime/onert/core/src/interp/Tensor.h
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Tensor.h
- * @brief This file contains ITensor interface, ROTensor class, and Tensor class
- */
-#ifndef __ONERT_INTERP_TENSOR_H__
-#define __ONERT_INTERP_TENSOR_H__
-
-#include "Buffer.h"
-
-#include "ir/OperandInfo.h"
-#include "backend/ITensor.h"
-#include "ir/Layout.h"
-
-namespace onert
-{
-namespace interp
-{
-
-/**
- * @brief Interface to handle Tensor in interpreter
- */
-class ITensor : public backend::ITensor
-{
-public:
- virtual ~ITensor() = default;
-
-public:
- virtual uint8_t *buffer() const = 0;
- /**
- * @brief Return shared pointer for buffer
- * @return Buffer shared pointer
- */
- virtual std::shared_ptr<const Buffer> shareBuffer() const = 0;
- /**
- * @brief Return read-only buffer pointer
- * @return Read-only buffer pointer
- */
- virtual const uint8_t *bufferRO() const = 0;
- /**
- * @brief Return shared pointer for data
- * @return Data shared pointer
- */
- virtual std::shared_ptr<const ir::Data> shareData() const = 0;
- /**
- * @brief Set internal/external buffer
- * @param[in] buffer Buffer pointer
- */
- virtual void setBuffer(std::shared_ptr<const Buffer> buffer) = 0;
- /**
- * @brief Set data reference (including constant, input)
- * @param[in] data Data pointer
- */
- virtual void setData(std::shared_ptr<const ir::Data> data) = 0;
- virtual void releaseData() = 0;
-
- virtual size_t total_size() const = 0;
- virtual size_t dimension(size_t index) const = 0;
- virtual size_t num_dimensions() const = 0;
- virtual size_t calcOffset(const ir::Coordinates &coords) const = 0;
-
- virtual bool has_padding() const = 0;
- /**
- * @brief Return data type of tensor
- * @return Data type of tensor
- */
- virtual ir::DataType data_type() const = 0;
- /**
- * @brief Return TensorInfo
- * @return TensorInfo
- */
- virtual const ir::OperandInfo &tensorInfo() const = 0;
- /**
- * @brief Return number of elements
- * @return Number of elements
- */
- virtual uint64_t num_elements() const = 0;
- void access(const std::function<void(backend::ITensor &tensor)> &fn) final;
-};
-
-/**
- * @brief Class to handle tensor in interpreter as read-only
- */
-class ROTensor final : public ITensor
-{
-public:
- ROTensor() = delete;
- ROTensor(const ir::OperandInfo &info) : _info(info)
- {
- // DO NOTHING
- }
-
-public:
- uint8_t *buffer() const override { throw std::runtime_error{"Read only tensor"}; }
- std::shared_ptr<const Buffer> shareBuffer() const override
- {
- throw std::runtime_error{"Read only tensor"};
- }
- const uint8_t *bufferRO() const override { return _data->base(); }
- std::shared_ptr<const ir::Data> shareData() const override { return _data; }
- void setBuffer(std::shared_ptr<const Buffer> buffer) override { _data = buffer; }
- void setData(std::shared_ptr<const ir::Data> data) override { _data = data; }
- void releaseData() override { _data = nullptr; }
-
- size_t total_size() const override { return _info.total_size(); }
- size_t dimension(size_t index) const override { return _info.shape().dim(index); }
- size_t num_dimensions() const override { return _info.shape().rank(); }
- size_t calcOffset(const ir::Coordinates &coords) const override;
- ir::Layout layout() const override;
- bool is_dynamic() const override { return false; }
- bool has_padding() const override { return false; }
- ir::DataType data_type() const override { return _info.typeInfo().type(); }
- float data_scale() const override { return _info.typeInfo().scale(); }
- int32_t data_offset() const override { return _info.typeInfo().offset(); }
- const ir::OperandInfo &tensorInfo() const override { return _info; }
- uint64_t num_elements() const override { return _info.shape().num_elements(); };
-
-private:
- const ir::OperandInfo _info;
- std::shared_ptr<const ir::Data> _data{nullptr};
-};
-
-/**
- * @brief Class to handle tensor in interpreter as writable
- */
-class Tensor final : public ITensor
-{
-public:
- Tensor() = delete;
- Tensor(const ir::OperandInfo &info) : _info(info)
- {
- // DO NOTHING
- }
-
-public:
- uint8_t *buffer() const override { return _buffer->baseWritable(); }
- std::shared_ptr<const Buffer> shareBuffer() const override { return _buffer; };
- const uint8_t *bufferRO() const override { return _buffer->base(); }
- std::shared_ptr<const ir::Data> shareData() const override { return _buffer; }
- void setBuffer(std::shared_ptr<const Buffer> buffer) override { _buffer = buffer; }
- void setData(std::shared_ptr<const ir::Data>) override
- {
- throw std::runtime_error{"Passed data may read-only"};
- }
- void releaseData() override { _buffer = nullptr; }
-
- size_t total_size() const override { return _info.total_size(); }
- size_t dimension(size_t index) const override { return _info.shape().dim(index); }
- size_t num_dimensions() const override { return _info.shape().rank(); }
- size_t calcOffset(const ir::Coordinates &coords) const override;
- ir::Layout layout() const override;
- bool is_dynamic() const override { return false; }
- bool has_padding() const override { return false; }
- ir::DataType data_type() const override { return _info.typeInfo().type(); }
- float data_scale() const override { return _info.typeInfo().scale(); }
- int32_t data_offset() const override { return _info.typeInfo().offset(); }
- const ir::OperandInfo &tensorInfo() const override { return _info; }
- uint64_t num_elements() const override { return _info.shape().num_elements(); };
- backend::IDynamicTensorManager *dynamic_tensor_manager() override { return nullptr; }
-
-private:
- const ir::OperandInfo _info;
- std::shared_ptr<const Buffer> _buffer{nullptr};
-};
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_TENSOR_H__
diff --git a/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc b/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc
deleted file mode 100644
index 86e883524..000000000
--- a/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-#include "ir/operation/BinaryArithmetic.h"
-#include "misc/polymorphic_downcast.h"
-#include "cker/Types.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-enum class OpType
-{
- ADD,
- SUB,
- MUL
-};
-
-void prepare(ExecEnv *env, const ir::Operation &node)
-{
- const auto &arithmetic_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::BinaryArithmetic &>(node);
-
- const auto lhs_index = node.getInputs().at(arithmetic_node.LHS);
- const auto rhs_index = node.getInputs().at(arithmetic_node.RHS);
- const auto out_index = node.getOutputs().at(0);
-
- const auto lhs_tensor = env->tensorAt(lhs_index);
- const auto rhs_tensor = env->tensorAt(rhs_index);
-
- // Check shape and type lhs is same with rhs
- // TODO Util function to compare TensorInfo
- if (lhs_tensor->data_type() != rhs_tensor->data_type())
- {
- throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Different input types"};
- }
-
- bool try_broadcast = (lhs_tensor->tensorInfo().shape() != rhs_tensor->tensorInfo().shape());
- if (try_broadcast)
- {
- bool success = true;
- auto out_shape = calcBroadcastShape(lhs_tensor->tensorInfo().shape(),
- rhs_tensor->tensorInfo().shape(), success);
- if (!success)
- {
- throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Fail to brodcasting"};
- }
-
- auto output_info =
- ir::OperandInfo::createStaticInfo(out_shape, lhs_tensor->tensorInfo().typeInfo());
- // We can handle already allocated (ex. model output)
- env->allocateIfNeeded(out_index, output_info);
- }
- else
- {
- // Output's shape and type is same with input
- auto output_info = lhs_tensor->tensorInfo();
- // We can handle already allocated (ex. model output)
- env->allocateIfNeeded(out_index, output_info);
- }
-
- auto out_tensor = env->tensorAt(out_index);
- // Check shape and type lhs is same with output
- // TODO Util function to compare TensorInfo
- if (lhs_tensor->data_type() != out_tensor->data_type())
- {
- throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Invalid output type"};
- }
-}
-
-inline void setActivationParams(float min, float max, nnfw::cker::BinaryArithmeticOpParam *params)
-{
- params->float_activation_min = min;
- params->float_activation_max = max;
-}
-
-inline void setActivationParams(int32_t min, int32_t max,
- nnfw::cker::BinaryArithmeticOpParam *params)
-{
- params->quantized_activation_min = min;
- params->quantized_activation_max = max;
-}
-
-template <typename raw_type, OpType op_type>
-void invoke(const ITensor *lhs_tensor, const ITensor *rhs_tensor, const ITensor *out_tensor,
- const ir::operation::BinaryArithmetic::Param &param)
-{
- const auto lhs_buffer = lhs_tensor->bufferRO();
- const auto rhs_buffer = rhs_tensor->bufferRO();
- auto out_buffer = out_tensor->buffer();
-
- nnfw::cker::BinaryArithmeticOpParam cker_param;
- raw_type activation_min, activation_max;
- calculateActivationRange(param.activation, &activation_min, &activation_max);
- setActivationParams(activation_min, activation_max, &cker_param);
- const raw_type *lhs_ptr = reinterpret_cast<const raw_type *>(lhs_buffer);
- const raw_type *rhs_ptr = reinterpret_cast<const raw_type *>(rhs_buffer);
- raw_type *out_ptr = reinterpret_cast<raw_type *>(out_buffer);
-
- const auto cker_op_type =
- (op_type == OpType::ADD)
- ? nnfw::cker::BinaryArithmeticOpType::ADD
- : ((op_type == OpType::SUB) ? nnfw::cker::BinaryArithmeticOpType::SUB
- : nnfw::cker::BinaryArithmeticOpType::MUL);
-
- const bool need_broadcast = nnfw::cker::ProcessBroadcastShapes(
- convertShape(lhs_tensor->tensorInfo().shape()),
- convertShape(rhs_tensor->tensorInfo().shape()), &cker_param);
-
- if (need_broadcast)
- {
- const auto lhs_shape = convertShape(lhs_tensor->tensorInfo().shape());
- const auto rhs_shape = convertShape(rhs_tensor->tensorInfo().shape());
- const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
- nnfw::cker::BroadcastBinaryArithmeticOp<cker_op_type>(cker_param, lhs_shape, lhs_ptr, rhs_shape,
- rhs_ptr, out_shape, out_ptr);
- return;
- }
-
- const auto lhs_shape = convertShape(lhs_tensor->tensorInfo().shape());
- const auto rhs_shape = convertShape(rhs_tensor->tensorInfo().shape());
- const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
- nnfw::cker::BinaryArithmeticOp<cker_op_type>(cker_param, lhs_shape, lhs_ptr, rhs_shape, rhs_ptr,
- out_shape, out_ptr);
-}
-
-template <OpType op_type>
-void invokeBinaryArithmetic(const ExecEnv *env, const ir::operation::BinaryArithmetic &node)
-{
- const auto lhs_index = node.getInputs().at(node.LHS);
- const auto rhs_index = node.getInputs().at(node.RHS);
- const auto out_index = node.getOutputs().at(0);
- const auto lhs_tensor = env->tensorAt(lhs_index);
- const auto rhs_tensor = env->tensorAt(rhs_index);
- const auto out_tensor = env->tensorAt(out_index);
- const auto data_type = lhs_tensor->data_type();
-
- if (data_type == ir::DataType::INT32)
- {
- invoke<int32_t, op_type>(lhs_tensor, rhs_tensor, out_tensor, node.param());
- }
- else if (data_type == ir::DataType::FLOAT32)
- {
- invoke<float, op_type>(lhs_tensor, rhs_tensor, out_tensor, node.param());
- }
- else
- {
- throw std::runtime_error{"NYI: Unsupported data type"};
- }
-}
-
-void invokeBinaryArithmeticOps(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &arithmetic_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::BinaryArithmetic &>(node);
-
- switch (arithmetic_node.param().arithmetic_type)
- {
- case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
- invokeBinaryArithmetic<OpType::ADD>(env, arithmetic_node);
- break;
- case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
- invokeBinaryArithmetic<OpType::SUB>(env, arithmetic_node);
- break;
- case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
- invokeBinaryArithmetic<OpType::MUL>(env, arithmetic_node);
- break;
- default:
- throw std::runtime_error{"Interp(BinaryArithmetic): NYI unsupported operation " +
- arithmetic_node.name()};
- break;
- }
-}
-
-} // namespace
-
-OpKernel *getBinaryArithmetic()
-{
- static OpKernel kernel = {prepare, invokeBinaryArithmeticOps};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/Concat.cc b/runtime/onert/core/src/interp/operations/Concat.cc
deleted file mode 100644
index efc46c66b..000000000
--- a/runtime/onert/core/src/interp/operations/Concat.cc
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/Concatenation.h>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-#include "ir/operation/Concat.h"
-#include "misc/polymorphic_downcast.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace concat
-{
-
-void prepareConcat(ExecEnv *env, const ir::Operation &node)
-{
- const auto &concat_node = nnfw::misc::polymorphic_downcast<const ir::operation::Concat &>(node);
-
- const auto first_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- const auto first_tensor = env->tensorAt(first_index);
- uint32_t out_axis_dimension = 0;
- const int32_t axis_raw = concat_node.param().axis;
- const uint32_t axis = (axis_raw < 0) ? (axis_raw + first_tensor->num_dimensions()) : axis_raw;
-
- // All inputs shape should be same except axis dimension
- // All inputs type should be same
- for (auto input : node.getInputs())
- {
- assert(first_tensor->num_dimensions() == env->tensorAt(input)->num_dimensions());
- assert(first_tensor->data_type() == env->tensorAt(input)->data_type());
- for (uint32_t i = 0; i < first_tensor->num_dimensions(); i++)
- {
- if (i == axis)
- {
- out_axis_dimension += env->tensorAt(input)->dimension(i);
- continue;
- }
- assert(first_tensor->dimension(i) == env->tensorAt(input)->dimension(i));
- }
- }
-
- // Make output tensor info using first input tensor info, and accumulated axis dimension value
- auto out_shape = first_tensor->tensorInfo().shape();
- out_shape.dim(axis) = out_axis_dimension;
- env->allocateIfNeeded(out_index, ir::OperandInfo::createStaticInfo(
- out_shape, first_tensor->tensorInfo().typeInfo()));
-
- auto out_tensor = env->tensorAt(out_index);
- UNUSED_RELEASE(out_tensor);
-
- // Output shape should be same with input except axis dimension
- // Output type should be same with input
- assert(first_tensor->data_type() == out_tensor->data_type());
- for (uint32_t i = 0; i < first_tensor->num_dimensions(); i++)
- {
- if (i == axis)
- {
- continue;
- }
- assert(first_tensor->dimension(i) == out_tensor->dimension(i));
- }
-}
-
-void invoke(const std::vector<const ITensor *> in_tensors, const ITensor *out_tensor, uint32_t axis)
-{
- const uint32_t count = in_tensors.size();
-
- // Calculate
- nnfw::cker::ConcatenationParams cker_param;
- cker_param.axis = (int8_t)axis;
- cker_param.inputs_count = count;
-
- const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
-
- std::vector<nnfw::cker::Shape> in_shapes;
- std::vector<const nnfw::cker::Shape *> in_shape_ptrs;
- in_shapes.reserve(count);
- in_shape_ptrs.reserve(count);
- std::vector<const float *> in_ptrs;
- for (uint32_t i = 0; i < count; i++)
- {
- in_shapes.push_back(convertShape(in_tensors[i]->tensorInfo().shape()));
- in_shape_ptrs.push_back(&in_shapes[i]);
- in_ptrs.push_back(reinterpret_cast<const float *>(in_tensors[i]->bufferRO()));
- }
-
- auto out_buffer = out_tensor->buffer();
- float *out_ptr = reinterpret_cast<float *>(out_buffer);
-
- nnfw::cker::Concatenation<float>(cker_param, in_shape_ptrs.data(), in_ptrs.data(), out_shape,
- out_ptr);
-}
-
-void invokeConcat(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &concat_node = nnfw::misc::polymorphic_downcast<const ir::operation::Concat &>(node);
- const int32_t axis_raw = concat_node.param().axis;
-
- std::vector<const ITensor *> in_tensors;
- for (const auto &e : concat_node.getInputs())
- {
- in_tensors.emplace_back(env->tensorAt(e));
- }
-
- const auto out_index = node.getOutputs().at(0);
- const auto out_tensor = env->tensorAt(out_index);
- const uint32_t axis = (axis_raw < 0) ? (axis_raw + out_tensor->num_dimensions()) : axis_raw;
-
- const auto data_type = in_tensors[0]->data_type();
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(in_tensors, out_tensor, axis);
- }
- else
- {
- throw std::runtime_error{"NYI: Support float32 only"};
- }
-}
-} // namespace concat
-
-OpKernel *getConcat()
-{
- static OpKernel kernel = {concat::prepareConcat, concat::invokeConcat};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/Conv2D.cc b/runtime/onert/core/src/interp/operations/Conv2D.cc
deleted file mode 100644
index bb00b828c..000000000
--- a/runtime/onert/core/src/interp/operations/Conv2D.cc
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/Conv.h>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-#include "ir/operation/Conv2D.h"
-#include "util/Utils.h"
-#include "util/ShapeInference.h"
-#include "misc/polymorphic_downcast.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace conv2d
-{
-
-void prepareConv2D(ExecEnv *env, const ir::Operation &node)
-{
- const auto in_index = node.getInputs().at(ir::operation::Conv2D::INPUT);
- const auto kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
- const auto bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS);
- const auto out_index = node.getOutputs().at(0);
-
- const auto in_tensor = env->tensorAt(in_index);
- const auto kernel_tensor = env->tensorAt(kernel_index);
- const auto bias_tensor = env->tensorAt(bias_index);
-
- assert(in_tensor->num_dimensions() == 4);
- assert(kernel_tensor->num_dimensions() == 4);
- assert(bias_tensor->num_dimensions() == 1);
-
- UNUSED_RELEASE(in_tensor);
- UNUSED_RELEASE(kernel_tensor);
- UNUSED_RELEASE(bias_tensor);
-
- const auto output_info = env->graph().operands().at(out_index).info();
- if (output_info.total_size() == 0)
- {
- // Handle unspecified output shape
- const auto &conv_node = nnfw::misc::polymorphic_downcast<const ir::operation::Conv2D &>(node);
- const auto infered_output_shape = shape_inference::inferConv2DShape(
- in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(), conv_node.param());
- env->allocateIfNeeded(
- out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
- }
- else
- {
- env->allocateIfNeeded(out_index, output_info);
- }
-
- auto out_tensor = env->tensorAt(out_index);
- UNUSED_RELEASE(out_tensor);
-
- // Handle same ifm & ofm data type only
- assert(in_tensor->data_type() == out_tensor->data_type());
- assert(out_tensor->num_dimensions() == 4);
-}
-
-void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor,
- const ITensor *ofm_tensor, const ir::operation::Conv2D::Param &param)
-{
- // TODO Support NCHW frontned
- const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
- const auto &ker_shape = ker_tensor->tensorInfo().shape();
- const auto ker_height = ker_shape.dim(1);
- const auto ker_width = ker_shape.dim(2);
- const auto padding = ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride,
- ker_width, ker_height);
-
- // Calculate
- float activation_min, activation_max;
- calculateActivationRange(param.activation, &activation_min, &activation_max);
-
- nnfw::cker::ConvParams cker_param;
- cker_param.padding_type = convertPaddingType(param.padding.type);
- cker_param.padding_values.width = padding.left;
- cker_param.padding_values.height = padding.top;
- cker_param.stride_width = param.stride.horizontal;
- cker_param.stride_height = param.stride.vertical;
- cker_param.dilation_width_factor = 1;
- cker_param.dilation_height_factor = 1;
- cker_param.float_activation_min = activation_min;
- cker_param.float_activation_max = activation_max;
-
- const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape());
- const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape());
- const auto cker_bias_shape = convertShape(bias_tensor->tensorInfo().shape());
- const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape());
- const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO());
- const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO());
- const float *bias_ptr = reinterpret_cast<const float *>(bias_tensor->bufferRO());
- float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer());
-
- nnfw::cker::Conv conv_kernel;
- conv_kernel(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr, cker_bias_shape,
- bias_ptr, cker_ofm_shape, ofm_ptr);
-}
-
-void invokeConv2D(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &conv_node = nnfw::misc::polymorphic_downcast<const ir::operation::Conv2D &>(node);
-
- const auto ifm_index = node.getInputs().at(ir::operation::Conv2D::INPUT);
- const auto ker_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
- const auto bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS);
- const auto ofm_index = node.getOutputs().at(0);
-
- const auto ifm_tensor = env->tensorAt(ifm_index);
- const auto ker_tensor = env->tensorAt(ker_index);
- const auto bias_tensor = env->tensorAt(bias_index);
- const auto ofm_tensor = env->tensorAt(ofm_index);
-
- const auto data_type = ifm_tensor->data_type();
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param());
- }
- else
- {
- throw std::runtime_error{"NYI: Support float32 only"};
- }
-}
-} // namespace conv2d
-
-OpKernel *getConv2D()
-{
- static OpKernel kernel = {conv2d::prepareConv2D, conv2d::invokeConv2D};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc b/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc
deleted file mode 100644
index 0473855d9..000000000
--- a/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/DepthwiseConv.h>
-#include <misc/polymorphic_downcast.h>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-#include "ir/operation/DepthwiseConv2D.h"
-#include "util/Utils.h"
-#include "util/ShapeInference.h"
-
-namespace onert
-{
-namespace interp
-{
-
-namespace
-{
-
-void prepareDepthwiseConv(ExecEnv *env, const ir::Operation &node)
-{
- const auto in_index = node.getInputs().at(ir::operation::DepthwiseConv2D::INPUT);
- const auto kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL);
- const auto bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS);
- const auto out_index = node.getOutputs().at(0);
-
- const auto in_tensor = env->tensorAt(in_index);
- const auto kernel_tensor = env->tensorAt(kernel_index);
- const auto bias_tensor = env->tensorAt(bias_index);
-
- assert(in_tensor->num_dimensions() == 4);
- assert(kernel_tensor->num_dimensions() == 4);
- assert(bias_tensor->num_dimensions() == 1);
-
- UNUSED_RELEASE(in_tensor);
- UNUSED_RELEASE(kernel_tensor);
- UNUSED_RELEASE(bias_tensor);
-
- // TODO handle unspecified output shape:
- // calculate output shape using ifm shape, kernel shape, padding, stride
- const auto output_info = env->graph().operands().at(out_index).info();
- if (output_info.total_size() == 0)
- {
- // Handle unspecified output shape
- const auto &depth_conv_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::DepthwiseConv2D &>(node);
- const auto infered_output_shape = shape_inference::inferDepthwiseConv2DShape(
- in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(),
- depth_conv_node.param());
- env->allocateIfNeeded(
- out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
- }
- else
- {
- env->allocateIfNeeded(out_index, output_info);
- }
-
- auto out_tensor = env->tensorAt(out_index);
- UNUSED_RELEASE(out_tensor);
-
- // Handle same ifm & ofm data type only
- assert(in_tensor->data_type() == out_tensor->data_type());
- assert(out_tensor->num_dimensions() == 4);
-}
-
-void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor,
- const ITensor *ofm_tensor, const ir::operation::DepthwiseConv2D::Param &param)
-{
- // TODO Support NCHW frontend
- const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- // Kernel format is [1, kernel_height, kernel_width, depth_out].
- const auto &ker_shape = ker_tensor->tensorInfo().shape();
- const auto ker_height = ker_shape.dim(1);
- const auto ker_width = ker_shape.dim(2);
- const auto padding = ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride,
- ker_width, ker_height);
-
- // Calculate
- float activation_min, activation_max;
- calculateActivationRange(param.activation, &activation_min, &activation_max);
-
- nnfw::cker::DepthwiseConvParams cker_param;
- cker_param.padding_values.width = padding.left;
- cker_param.padding_values.height = padding.top;
- cker_param.depth_multiplier = param.multiplier;
- cker_param.stride_width = param.stride.horizontal;
- cker_param.stride_height = param.stride.vertical;
- cker_param.dilation_width_factor = 1;
- cker_param.dilation_height_factor = 1;
- cker_param.float_activation_min = activation_min;
- cker_param.float_activation_max = activation_max;
-
- const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape());
- const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape());
- const auto cker_bias_shape = convertShape(bias_tensor->tensorInfo().shape());
- const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape());
- const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO());
- const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO());
- const float *bias_ptr = reinterpret_cast<const float *>(bias_tensor->bufferRO());
- float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer());
-
- nnfw::cker::DepthwiseConv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr,
- cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr);
-}
-
-void invokeDepthwiseConv(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &conv_node = static_cast<const ir::operation::DepthwiseConv2D &>(node);
-
- const auto ifm_index = node.getInputs().at(ir::operation::DepthwiseConv2D::INPUT);
- const auto ker_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL);
- const auto bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS);
- const auto ofm_index = node.getOutputs().at(0);
-
- const auto ifm_tensor = env->tensorAt(ifm_index);
- const auto ker_tensor = env->tensorAt(ker_index);
- const auto bias_tensor = env->tensorAt(bias_index);
- const auto ofm_tensor = env->tensorAt(ofm_index);
-
- const auto data_type = ifm_tensor->data_type();
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param());
- }
- else
- {
- throw std::runtime_error{"NYI: Support float32 only"};
- }
-}
-
-} // namespace
-
-OpKernel *getDepthwiseConv2D()
-{
- static OpKernel kernel = {prepareDepthwiseConv, invokeDepthwiseConv};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc b/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc
deleted file mode 100644
index c8773bef4..000000000
--- a/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cmath>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-
-#include "ir/operation/ElementwiseActivation.h"
-
-#include <misc/polymorphic_downcast.h>
-#include <cker/operation/Logistic.h>
-#include <cker/operation/Tanh.h>
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-enum class ActivationType
-{
- Logistic,
- ReLU,
- Tanh
-};
-
-void prepare(ExecEnv *env, const ir::Operation &node)
-{
- const auto input_index = node.getInputs().at(0);
- const auto output_index = node.getOutputs().at(0);
-
- const auto input_tensor = env->tensorAt(input_index);
-
- const auto output_info = env->graph().operands().at(output_index).info();
- if (output_info.total_size() == 0)
- {
- // Output's shape and type is same with input
- auto input_info = input_tensor->tensorInfo();
- // We can handle already allocated (ex. model output)
- env->allocateIfNeeded(output_index, input_info);
- }
- else
- {
- env->allocateIfNeeded(output_index, output_info);
- }
-
- const auto output_tensor = env->tensorAt(output_index);
- // Check shape and type lhs is same with output
- // TODO Util function to compare TensorInfo
- if (input_tensor->data_type() != output_tensor->data_type())
- {
- throw std::runtime_error{"Interp(ElementwiseActivation): Invalid output type"};
- }
-}
-
-template <ActivationType act_type>
-void evalFloat(const float *input_ptr, float *output_ptr, uint64_t num_elements, float alpha,
- float beta)
-{
- std::function<float(const float &)> fn = [](const float &) { return std::nanf(""); };
- switch (act_type)
- {
- case ActivationType::ReLU:
- fn = [alpha, beta](const float &in) { return std::min(std::max(beta, in), alpha); };
- break;
- case ActivationType::Tanh:
- fn = [](const float &in) { return std::tanh(in); };
- break;
- default:
- throw std::runtime_error{"Interp(ElementwiseActivation): NYI - Unsupported activation"};
- break;
- }
-
- const float *input_end = input_ptr + num_elements;
- for (; input_ptr < input_end; input_ptr++, output_ptr++)
- {
- *output_ptr = fn(*input_ptr);
- }
-}
-
-template <ActivationType act_type> void invoke(const ExecEnv *env, const ir::Operation &node)
-{
- const auto input_index = node.getInputs().at(0);
- const auto output_index = node.getOutputs().at(0);
-
- // Check lhs shape is same with rhs (with broadcast)
- const auto input_tensor = env->tensorAt(input_index);
- const auto output_tensor = env->tensorAt(output_index);
-
- const auto data_type = input_tensor->data_type();
- if (data_type == ir::DataType::FLOAT32)
- {
- uint64_t elements = input_tensor->num_elements();
- const float *input_start = reinterpret_cast<const float *>(input_tensor->bufferRO());
- float *out = reinterpret_cast<float *>(output_tensor->buffer());
- if (act_type == ActivationType::Logistic)
- {
- const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
- const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
- nnfw::cker::Logistic(cker_input_shape, input_start, cker_output_shape, out);
- }
- else
- {
- const auto &act_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::ElementwiseActivation &>(node);
- evalFloat<act_type>(input_start, out, elements, act_node.param().alpha,
- act_node.param().beta);
- }
- }
- else
- {
- throw std::runtime_error{"Interp(" + node.name() + "): NYI - Support float only"};
- }
-}
-
-void invokeElementwiseActivation(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &act_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::ElementwiseActivation &>(node);
- switch (act_node.param().op_type)
- {
- case ir::operation::ElementwiseActivation::Type::LOGISTIC:
- invoke<ActivationType::Logistic>(env, node);
- break;
- case ir::operation::ElementwiseActivation::Type::RELU:
- invoke<ActivationType::ReLU>(env, node);
- break;
- case ir::operation::ElementwiseActivation::Type::TANH:
- invoke<ActivationType::Tanh>(env, node);
- break;
- default:
- throw std::runtime_error("Interp(" + node.name() + "): NYI - Unsupported activation");
- }
-}
-
-} // namespace
-
-OpKernel *getElementwiseActivation()
-{
- static OpKernel kernel = {prepare, invokeElementwiseActivation};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/FullyConnected.cc b/runtime/onert/core/src/interp/operations/FullyConnected.cc
deleted file mode 100644
index 12f529dab..000000000
--- a/runtime/onert/core/src/interp/operations/FullyConnected.cc
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/FullyConnected.h>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-#include "ir/operation/FullyConnected.h"
-#include "misc/polymorphic_downcast.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace fc
-{
-
-void prepareFC(ExecEnv *env, const ir::Operation &node)
-{
- const auto in_index = node.getInputs().at(ir::operation::FullyConnected::INPUT);
- const auto kernel_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
- const auto bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS);
- const auto out_index = node.getOutputs().at(0);
-
- const auto in_tensor = env->tensorAt(in_index);
- const auto kernel_tensor = env->tensorAt(kernel_index);
- const auto bias_tensor = env->tensorAt(bias_index);
-
- UNUSED_RELEASE(in_tensor);
- UNUSED_RELEASE(kernel_tensor);
- UNUSED_RELEASE(bias_tensor);
-
- assert(in_tensor->num_dimensions() >= 2);
- assert(kernel_tensor->num_dimensions() == 2);
- assert(bias_tensor->num_dimensions() == 1);
-
- const auto input_size_with_batch = in_tensor->num_elements();
- const auto num_units = kernel_tensor->dimension(0);
- const auto input_size = kernel_tensor->dimension(1);
- const auto batch_size = input_size_with_batch / input_size;
- assert(input_size_with_batch % input_size == 0);
- assert(num_units == bias_tensor->dimension(0));
-
- // Make output tensor info
- ir::Shape output_shape(2);
- output_shape.dim(0) = batch_size;
- output_shape.dim(1) = num_units;
- const auto out_info =
- ir::OperandInfo::createStaticInfo(output_shape, in_tensor->tensorInfo().typeInfo());
- env->allocateIfNeeded(out_index, out_info);
-
- auto out_tensor = env->tensorAt(out_index);
- UNUSED_RELEASE(out_tensor);
-
- // Handle same ifm & ofm data type only
- assert(in_tensor->data_type() == out_tensor->data_type());
- assert(out_tensor->num_dimensions() == 2);
- assert(out_tensor->dimension(0) == batch_size);
- assert(out_tensor->dimension(1) == num_units);
-}
-
-void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor,
- const ITensor *ofm_tensor, const ir::operation::FullyConnected::Param &param)
-{
- const auto ifm_buffer = ifm_tensor->bufferRO();
- const auto ker_buffer = ker_tensor->bufferRO();
- const auto bias_buffer = bias_tensor->bufferRO();
- auto ofm_buffer = ofm_tensor->buffer();
-
- // Calculate
- nnfw::cker::FullyConnectedParams cker_param;
- cker_param.activation = convertActivationType(param.activation);
- calculateActivationRange(param.activation, &cker_param.float_activation_min,
- &cker_param.float_activation_max);
- const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape());
- const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape());
- const auto cker_bias_shape = convertShape(bias_tensor->tensorInfo().shape());
- const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape());
- const float *ifm_ptr = reinterpret_cast<const float *>(ifm_buffer);
- const float *ker_ptr = reinterpret_cast<const float *>(ker_buffer);
- const float *bias_ptr = reinterpret_cast<const float *>(bias_buffer);
- float *ofm_ptr = reinterpret_cast<float *>(ofm_buffer);
-
- nnfw::cker::FullyConnected(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr,
- cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr);
-}
-
-void invokeFC(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &conv_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::FullyConnected &>(node);
-
- const auto ifm_index = node.getInputs().at(ir::operation::FullyConnected::INPUT);
- const auto ker_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
- const auto bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS);
- const auto ofm_index = node.getOutputs().at(0);
-
- const auto ifm_tensor = env->tensorAt(ifm_index);
- const auto ker_tensor = env->tensorAt(ker_index);
- const auto bias_tensor = env->tensorAt(bias_index);
- const auto ofm_tensor = env->tensorAt(ofm_index);
-
- const auto data_type = ifm_tensor->data_type();
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param());
- }
- else
- {
- throw std::runtime_error{"NYI: Support float only"};
- }
-}
-} // namespace fc
-
-OpKernel *getFullyConnected()
-{
- static OpKernel kernel = {fc::prepareFC, fc::invokeFC};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/Gather.cc b/runtime/onert/core/src/interp/operations/Gather.cc
deleted file mode 100644
index 9e82def5f..000000000
--- a/runtime/onert/core/src/interp/operations/Gather.cc
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/Gather.h>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-#include "ir/operation/Gather.h"
-#include "misc/polymorphic_downcast.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void prepareGather(ExecEnv *env, const ir::Operation &node)
-{
- const auto input_index = node.getInputs().at(ir::operation::Gather::INPUT);
- const auto indices_index = node.getInputs().at(ir::operation::Gather::INDICES);
- const auto output_index = node.getOutputs().at(0);
-
- const auto input_tensor = env->tensorAt(input_index);
- const auto indices_tensor = env->tensorAt(indices_index);
-
- // TODO handle unspecified output shape:
- // calculate output shape using ifm shape, kernel shape, padding, stride
- const auto output_info = env->graph().operands().at(output_index).info();
- if (output_info.total_size() == 0)
- {
- throw std::runtime_error{"Interp(Gather): NYI for unspecified output shape"};
- }
- else
- {
- env->allocateIfNeeded(output_index, output_info);
- }
-
- if (indices_tensor->data_type() != ir::DataType::INT32)
- {
- throw std::runtime_error{"Interp(Gather): Invalid indices data type"};
- }
-
- auto output_tensor = env->tensorAt(output_index);
- auto output_rank = input_tensor->num_dimensions() + indices_tensor->num_dimensions() - 1;
-
- if (output_rank != output_tensor->num_dimensions())
- {
- throw std::runtime_error{"Interp(Gather): Invalid output rank"};
- }
- if (output_tensor->data_type() != input_tensor->data_type())
- {
- throw std::runtime_error{"Interp(Gather): Invalid output data type"};
- }
-
- if (input_tensor->data_type() == ir::DataType::QUANT_UINT8_ASYMM &&
- input_tensor->tensorInfo().typeInfo() != output_tensor->tensorInfo().typeInfo())
- {
- throw std::runtime_error{
- "Interp(Gather): Cannot handle different I/O QUANT_UINT8_ASYMM scale/offset"};
- }
-}
-
-template <typename raw_type>
-void invoke(const ITensor *input_tensors, const ITensor *indices_tensors,
- const ITensor *output_tensor, uint32_t axis)
-{
- // Calculate
- nnfw::cker::GatherParams cker_param;
- cker_param.axis = (int8_t)axis;
-
- const auto cker_input_shapes = convertShape(input_tensors->tensorInfo().shape());
- const auto cker_indices_shape = convertShape(indices_tensors->tensorInfo().shape());
- const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
- const raw_type *input_ptr = reinterpret_cast<const raw_type *>(input_tensors->bufferRO());
- const int32_t *indices_ptr = reinterpret_cast<const int32_t *>(indices_tensors->bufferRO());
- raw_type *output_ptr = reinterpret_cast<raw_type *>(output_tensor->buffer());
-
- nnfw::cker::Gather<raw_type>(cker_param, cker_input_shapes, input_ptr, cker_indices_shape,
- indices_ptr, cker_output_shape, output_ptr);
-}
-
-void invokeGather(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &gather_node = nnfw::misc::polymorphic_downcast<const ir::operation::Gather &>(node);
- const int32_t axis_raw = gather_node.param().axis;
-
- const auto input_index = node.getInputs().at(ir::operation::Gather::INPUT);
- const auto indices_index = node.getInputs().at(ir::operation::Gather::INDICES);
- const auto output_index = node.getOutputs().at(0);
-
- const auto input_tensor = env->tensorAt(input_index);
- const auto indices_tensor = env->tensorAt(indices_index);
- const auto output_tensor = env->tensorAt(output_index);
- const uint32_t axis = (axis_raw < 0) ? (axis_raw + input_tensor->num_dimensions()) : axis_raw;
-
- const auto data_type = input_tensor->data_type();
-
- switch (data_type)
- {
- case ir::DataType::FLOAT32:
- invoke<float>(input_tensor, indices_tensor, output_tensor, axis);
- break;
- case ir::DataType::INT32:
- invoke<int32_t>(input_tensor, indices_tensor, output_tensor, axis);
- break;
- case ir::DataType::QUANT_UINT8_ASYMM:
- invoke<uint8_t>(input_tensor, indices_tensor, output_tensor, axis);
- break;
- default:
- throw std::runtime_error{"Interp(Gather): NYI - Not supported type"};
- }
-}
-
-} // namespace
-
-OpKernel *getGather()
-{
- static OpKernel kernel = {prepareGather, invokeGather};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/InstanceNorm.cc b/runtime/onert/core/src/interp/operations/InstanceNorm.cc
deleted file mode 100644
index 2538bcc39..000000000
--- a/runtime/onert/core/src/interp/operations/InstanceNorm.cc
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/InstanceNorm.h>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-#include "ir/operation/InstanceNorm.h"
-#include "misc/polymorphic_downcast.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace instancenorm
-{
-
-void prepareInstanceNorm(ExecEnv *env, const ir::Operation &node)
-{
- const auto &instancenorm_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::InstanceNorm &>(node);
-
- const auto input_index = node.getInputs().at(instancenorm_node.INPUT);
- const auto output_index = node.getOutputs().at(0);
- const auto input_tensor = env->tensorAt(input_index);
-
- if (input_tensor->num_dimensions() != 4)
- {
- throw std::runtime_error{"Interp(InstanceNorm): Input should be 4D-tensor"};
- }
-
- // Output shape should be same with input
- env->allocateIfNeeded(output_index, input_tensor->tensorInfo());
-
- auto output_tensor = env->tensorAt(output_index);
- UNUSED_RELEASE(output_tensor);
-
- // Handle same ifm & ofm data type only
- assert(input_tensor->data_type() == output_tensor->data_type());
- assert(input_tensor->tensorInfo().shape() == output_tensor->tensorInfo().shape());
-}
-
-inline void setActivationParams(float min, float max, nnfw::cker::InstanceNormParams *params)
-{
- params->float_activation_min = min;
- params->float_activation_max = max;
-}
-
-void invoke(const ITensor *input_tensor, const ITensor *gamma_tensor, const ITensor *beta_tensor,
- const ITensor *output_tensor, const ir::operation::InstanceNorm::Param &param)
-{
- // Calculate
- float activation_min, activation_max;
- calculateActivationRange(param.activation, &activation_min, &activation_max);
-
- nnfw::cker::InstanceNormParams cker_param;
- cker_param.epsilon = param.epsilon;
- cker_param.float_activation_min = activation_min;
- cker_param.float_activation_max = activation_max;
-
- const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
- const auto cker_gamma_shape = convertShape(gamma_tensor->tensorInfo().shape());
- const auto cker_beta_shape = convertShape(beta_tensor->tensorInfo().shape());
- const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
- const float *input_ptr = reinterpret_cast<const float *>(input_tensor->bufferRO());
- const float *gamma_ptr = reinterpret_cast<const float *>(gamma_tensor->bufferRO());
- const float *beta_ptr = reinterpret_cast<const float *>(beta_tensor->bufferRO());
- float *output_ptr = reinterpret_cast<float *>(output_tensor->buffer());
-
- nnfw::cker::InstanceNorm(cker_param, cker_input_shape, input_ptr, cker_gamma_shape, gamma_ptr,
- cker_beta_shape, beta_ptr, cker_output_shape, output_ptr);
-}
-
-void invokeInstanceNorm(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &instancenorm_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::InstanceNorm &>(node);
-
- const auto input_index = node.getInputs().at(instancenorm_node.INPUT);
- const auto gamma_index = node.getInputs().at(instancenorm_node.GAMMA);
- const auto beta_index = node.getInputs().at(instancenorm_node.BETA);
- const auto out_index = node.getOutputs().at(0);
- const auto input_tensor = env->tensorAt(input_index);
- const auto gamma_tensor = env->tensorAt(gamma_index);
- const auto beta_tensor = env->tensorAt(beta_index);
- const auto out_tensor = env->tensorAt(out_index);
- const auto data_type = input_tensor->data_type();
-
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(input_tensor, gamma_tensor, beta_tensor, out_tensor, instancenorm_node.param());
- }
- else
- {
- throw std::runtime_error{"NYI: Unsupported data type"};
- }
-}
-} // namespace instancenorm
-
-OpKernel *getInstanceNorm()
-{
- static OpKernel kernel = {instancenorm::prepareInstanceNorm, instancenorm::invokeInstanceNorm};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/OperationUtil.h b/runtime/onert/core/src/interp/operations/OperationUtil.h
deleted file mode 100644
index 2fdf098f0..000000000
--- a/runtime/onert/core/src/interp/operations/OperationUtil.h
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_INTERP_OPERATIONS_OPERATION_UTILS_H_
-#define __ONERT_INTERP_OPERATIONS_OPERATION_UTILS_H_
-
-#include "ir/Shape.h"
-#include "ir/InternalType.h"
-#include "ir/Padding.h"
-
-#include <cker/Shape.h>
-#include <cker/Types.h>
-
-namespace onert
-{
-namespace interp
-{
-
-inline nnfw::cker::Shape convertShape(const ir::Shape &shape)
-{
- auto dimensions = std::vector<uint32_t>(shape.dims().begin(), shape.dims().end());
-
- std::vector<int32_t> raw_shape;
- raw_shape.resize(dimensions.size());
-
- for (uint32_t i = 0; i < dimensions.size(); ++i)
- {
- raw_shape[i] = dimensions[i];
- }
-
- return nnfw::cker::GetShape(raw_shape);
-}
-
-inline nnfw::cker::Shape convertExtendShape(const ir::Shape &shape)
-{
- auto dimensions = std::vector<uint32_t>(shape.dims().begin(), shape.dims().end());
-
- const int32_t extended_rank = 4;
- int32_t raw_shape[extended_rank];
- uint32_t start = extended_rank - dimensions.size();
-
- for (uint32_t i = 0; i < extended_rank; ++i)
- {
- if (i < start)
- {
- raw_shape[i] = 1;
- }
- else
- {
- raw_shape[i] = dimensions[i - start];
- }
- }
-
- return nnfw::cker::Shape(extended_rank, raw_shape);
-}
-
-inline nnfw::cker::FusedActivationFunctionType
-convertActivationType(const ir::Activation activation)
-{
- switch (activation)
- {
- case ir::Activation::NONE:
- return nnfw::cker::FusedActivationFunctionType::kNone;
- case ir::Activation::RELU:
- return nnfw::cker::FusedActivationFunctionType::kRelu;
- case ir::Activation::RELU1:
- return nnfw::cker::FusedActivationFunctionType::kRelu1;
- case ir::Activation::RELU6:
- return nnfw::cker::FusedActivationFunctionType::kRelu6;
- default:
- throw std::runtime_error{"CPU backend: Cannot convert activation type"};
- }
-}
-
-template <typename T>
-void calculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
-{
- if (activation == ir::Activation::RELU)
- {
- *activation_min = 0;
- *activation_max = std::numeric_limits<T>::max();
- }
- else if (activation == ir::Activation::RELU6)
- {
- *activation_min = 0;
- *activation_max = 6;
- }
- else if (activation == ir::Activation::RELU1)
- {
- *activation_min = -1;
- *activation_max = 1;
- }
- else if (activation == ir::Activation::NONE)
- {
- *activation_min = std::numeric_limits<T>::lowest();
- *activation_max = std::numeric_limits<T>::max();
- }
- else
- {
- throw std::runtime_error{"Unsupported activation type"};
- }
-}
-
-inline ir::Shape calcBroadcastShape(const ir::Shape &lhs, const ir::Shape &rhs, bool &success)
-{
- int lhs_rank = lhs.rank();
- int rhs_rank = rhs.rank();
-
- int out_rank = (lhs_rank > rhs_rank ? lhs_rank : rhs_rank);
- ir::Shape out_shape(out_rank);
-
- int lhs_idim = lhs_rank - 1;
- int rhs_idim = rhs_rank - 1;
- success = true;
- for (int out_idim = out_rank - 1; out_idim >= 0; out_idim--)
- {
- if (lhs_idim == -1 && rhs_idim == -1)
- {
- // invalid result
- success = false;
- break;
- }
-
- if (lhs_idim == -1)
- {
- out_shape.dim(out_idim) = rhs.dim(rhs_idim);
- rhs_idim--;
- }
- else if (rhs_idim == -1)
- {
- out_shape.dim(out_idim) = lhs.dim(lhs_idim);
- lhs_idim--;
- }
- else
- {
- if (lhs.dim(lhs_idim) == rhs.dim(rhs_idim))
- {
- out_shape.dim(out_idim) = lhs.dim(lhs_idim);
- lhs_idim--;
- rhs_idim--;
- }
- else if (lhs.dim(lhs_idim) == 1)
- {
- out_shape.dim(out_idim) = rhs.dim(rhs_idim);
- lhs_idim--;
- rhs_idim--;
- }
- else if (rhs.dim(rhs_idim) == 1)
- {
- out_shape.dim(out_idim) = lhs.dim(lhs_idim);
- lhs_idim--;
- rhs_idim--;
- }
- else
- {
- // invalid result
- success = false;
- break;
- }
- }
- }
-
- if (lhs_idim != -1 || rhs_idim != -1)
- {
- // invalid result
- success = false;
- }
- return out_shape;
-}
-
-inline nnfw::cker::PaddingType convertPaddingType(ir::PaddingType ir_padding_type)
-{
- switch (ir_padding_type)
- {
- case ir::PaddingType::EXPLICIT:
- return nnfw::cker::PaddingType::kNone;
- case ir::PaddingType::SAME:
- return nnfw::cker::PaddingType::kSame;
- case ir::PaddingType::VALID:
- return nnfw::cker::PaddingType::kValid;
- default:
- throw std::runtime_error("Wrong padding type.");
- break;
- }
-}
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_OPERATIONS_OPERATION_UTILS_H_
diff --git a/runtime/onert/core/src/interp/operations/Pad.cc b/runtime/onert/core/src/interp/operations/Pad.cc
deleted file mode 100644
index c8dce698d..000000000
--- a/runtime/onert/core/src/interp/operations/Pad.cc
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/Pad.h>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-#include "ir/operation/Pad.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void preparePad(ExecEnv *env, const ir::Operation &node)
-{
- const auto input_index = node.getInputs().at(ir::operation::Pad::INPUT);
- const auto output_index = node.getOutputs().at(0);
-
- const auto input_tensor = env->tensorAt(input_index);
-
- const auto output_info = env->graph().operands().at(output_index).info();
-
- // Check shape and type lhs is same with rhs
- // TODO Util function to compare TensorInfo
- if (output_info.total_size() == 0)
- {
- throw std::runtime_error{"Interp(Pad): NYI unspecified output shape"};
- }
- else
- {
- env->allocateIfNeeded(output_index, output_info);
- }
-
- const auto output_tensor = env->tensorAt(output_index);
- if (input_tensor->data_type() != output_tensor->data_type())
- {
- throw std::runtime_error{"Interp(Pad): Invalid output type"};
- }
-}
-
-void invoke(const ITensor *input_tensor, const ITensor *pad_tensor, const ITensor *output_tensor)
-{
- const auto input_buffer = input_tensor->bufferRO();
- const auto pad_buffer = pad_tensor->bufferRO();
- auto output_buffer = output_tensor->buffer();
-
- int32_t pad_rank = pad_tensor->dimension(0);
-
- const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
- const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
- const float *input_ptr = reinterpret_cast<const float *>(input_buffer);
- const int32_t *pad_ptr = reinterpret_cast<const int32_t *>(pad_buffer);
- float *output_ptr = reinterpret_cast<float *>(output_buffer);
-
- nnfw::cker::Pad<float>(pad_ptr, pad_rank, cker_input_shape, input_ptr, cker_output_shape,
- output_ptr, nullptr);
-}
-
-void invokePad(const ExecEnv *env, const ir::Operation &node)
-{
- const auto input_index = node.getInputs().at(ir::operation::Pad::INPUT);
- const auto pad_index = node.getInputs().at(ir::operation::Pad::PAD);
- const auto output_index = node.getOutputs().at(0);
-
- const auto input_tensor = env->tensorAt(input_index);
- const auto pad_tensor = env->tensorAt(pad_index);
- const auto output_tensor = env->tensorAt(output_index);
-
- const auto data_type = input_tensor->data_type();
-
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(input_tensor, pad_tensor, output_tensor);
- }
- else
- {
- throw std::runtime_error{"Interp(Pad): NYI - Unsupported data type"};
- }
-}
-} // namespace
-
-OpKernel *getPad()
-{
- static OpKernel kernel = {preparePad, invokePad};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/Pool2D.cc b/runtime/onert/core/src/interp/operations/Pool2D.cc
deleted file mode 100644
index 92f9d70b2..000000000
--- a/runtime/onert/core/src/interp/operations/Pool2D.cc
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/AveragePool.h>
-#include <cker/operation/MaxPool.h>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-#include "ir/operation/Pool2D.h"
-#include "util/Utils.h"
-#include "util/ShapeInference.h"
-#include "misc/polymorphic_downcast.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace pool2d
-{
-
-void preparePool2D(ExecEnv *env, const ir::Operation &node)
-{
- const auto &pool_node = nnfw::misc::polymorphic_downcast<const ir::operation::Pool2D &>(node);
- const auto in_index = node.getInputs().at(pool_node.INPUT);
- const auto out_index = node.getOutputs().at(0);
-
- const auto in_tensor = env->tensorAt(in_index);
- UNUSED_RELEASE(in_tensor);
-
- assert(in_tensor->num_dimensions() == 4);
-
- const auto output_info = env->graph().operands().at(out_index).info();
- if (output_info.total_size() == 0)
- {
- // Handle unspecified output shape
- const auto infered_output_shape =
- shape_inference::inferPoolShape(in_tensor->tensorInfo().shape(), pool_node.param());
- env->allocateIfNeeded(
- out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
- }
- else
- {
- env->allocateIfNeeded(out_index, output_info);
- }
-
- auto out_tensor = env->tensorAt(out_index);
- UNUSED_RELEASE(out_tensor);
-
- // Handle same ifm & ofm data type only
- assert(in_tensor->data_type() == out_tensor->data_type());
- assert(out_tensor->num_dimensions() == 4);
-}
-
-template <typename T>
-void invoke(const nnfw::cker::PoolParams &params, const nnfw::cker::Shape &in_shape,
- const T *in_ptr, const nnfw::cker::Shape &out_shape, T *out_ptr,
- ir::operation::Pool2D::PoolType op_type)
-{
- switch (op_type)
- {
- case ir::operation::Pool2D::PoolType::AVG:
- nnfw::cker::AveragePool<T>(params, in_shape, in_ptr, out_shape, out_ptr);
- break;
- case ir::operation::Pool2D::PoolType::MAX:
- nnfw::cker::MaxPool<T>(params, in_shape, in_ptr, out_shape, out_ptr);
- break;
- default:
- throw std::runtime_error{"Interp(Pool2D): NYI unsupported operation"};
- break;
- }
-}
-
-void invokePool2DOps(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &pool_node = nnfw::misc::polymorphic_downcast<const ir::operation::Pool2D &>(node);
-
- const auto in_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- // Check lhs shape is same with rhs (with broadcast)
- const auto in_tensor = env->tensorAt(in_index);
- const auto out_tensor = env->tensorAt(out_index);
-
- // TODO support NCHW frontend
- const auto ifm_shape = in_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- const auto param = pool_node.param();
- const auto padding =
- ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, param.kw, param.kh);
- // Calculate
- nnfw::cker::PoolParams cker_param;
- cker_param.filter_width = param.kw;
- cker_param.filter_height = param.kh;
- cker_param.padding_values.width = padding.left;
- cker_param.padding_values.height = padding.top;
- cker_param.stride_width = param.stride.horizontal;
- cker_param.stride_height = param.stride.vertical;
-
- const auto data_type = in_tensor->data_type();
- if (data_type == ir::DataType::FLOAT32)
- {
- calculateActivationRange(param.activation, &cker_param.float_activation_min,
- &cker_param.float_activation_max);
-
- const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
- const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
- const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
- float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
- // Now, invoke() supports only Pool2D in float
- invoke<float>(cker_param, in_shape, in_ptr, out_shape, out_ptr, param.op_type);
- }
- else
- {
- throw std::runtime_error{"NYI: Support float only"};
- }
-}
-} // namespace pool2d
-
-OpKernel *getPool2D()
-{
- static OpKernel kernel = {pool2d::preparePool2D, pool2d::invokePool2DOps};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/Reshape.cc b/runtime/onert/core/src/interp/operations/Reshape.cc
deleted file mode 100644
index 3a118456b..000000000
--- a/runtime/onert/core/src/interp/operations/Reshape.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "interp/Registration.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void prepare(ExecEnv *env, const ir::Operation &node)
-{
- const auto in_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- // Unspecified shape is not supported in operation node spec now
- const auto output_info = env->graph().operands().at(out_index).info();
- env->allocateAndShareIfNeeded(out_index, output_info, in_index);
-
- assert(output_info.total_size() == env->graph().operands().at(in_index).info().total_size());
-}
-
-void invoke(const ExecEnv *env, const ir::Operation &node)
-{
- const auto in_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- if (env->tensorAt(in_index)->bufferRO() == env->tensorAt(out_index)->bufferRO())
- {
- // Same data
- return;
- }
-
- const auto output_info = env->graph().operands().at(out_index).info();
- memcpy(env->tensorAt(out_index)->buffer(), env->tensorAt(in_index)->bufferRO(),
- output_info.total_size());
-}
-
-} // namespace
-
-OpKernel *getReshape()
-{
- static OpKernel kernel = {prepare, invoke};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/Softmax.cc b/runtime/onert/core/src/interp/operations/Softmax.cc
deleted file mode 100644
index d30f78deb..000000000
--- a/runtime/onert/core/src/interp/operations/Softmax.cc
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/SoftMax.h>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-#include "ir/operation/Softmax.h"
-#include "misc/polymorphic_downcast.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void prepareSoftMax(ExecEnv *env, const ir::Operation &node)
-{
- const auto in_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- const auto in_tensor = env->tensorAt(in_index);
- UNUSED_RELEASE(in_tensor);
-
- assert((in_tensor->num_dimensions() == 4) || (in_tensor->num_dimensions() == 2));
-
- // Output shape should be same with input
- // Output type is pre-defined in model
- const auto output_shape = env->graph().operands().at(in_index).info().shape();
- const auto output_type = env->graph().operands().at(out_index).info().typeInfo();
-
- const auto output_info = ir::OperandInfo::createStaticInfo(output_shape, output_type);
- env->allocateIfNeeded(out_index, output_info);
-
- auto out_tensor = env->tensorAt(out_index);
- UNUSED_RELEASE(out_tensor);
-
- // Check output shape is same with input
- assert(out_tensor->num_dimensions() == out_tensor->num_dimensions());
- for (uint32_t i = 0; i < in_tensor->num_dimensions(); i++)
- {
- assert(in_tensor->dimension(i) == out_tensor->dimension(i));
- }
-}
-
-void invoke(const ITensor *in_tensor, const ITensor *out_tensor,
- const ir::operation::Softmax::Param &param)
-{
- const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
- float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
-
- float beta = param.beta;
-
- if (in_tensor->num_dimensions() == 2)
- {
- uint32_t batch_size = in_tensor->dimension(0);
- uint32_t input_size = in_tensor->dimension(1);
-
- nnfw::cker::Softmax(in_ptr, input_size, batch_size, beta, out_ptr);
- }
- else if (in_tensor->num_dimensions() == 4)
- {
- const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
- const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
-
- nnfw::cker::SoftmaxParams cker_param;
- cker_param.beta = beta;
-
- nnfw::cker::Softmax(cker_param, in_shape, in_ptr, out_shape, out_ptr);
- }
- else
- {
- throw std::runtime_error{"Unsuported input dimension: support 2D or 4D"};
- }
-}
-
-void invokeSoftMax(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &softmax_node = nnfw::misc::polymorphic_downcast<const ir::operation::Softmax &>(node);
-
- const auto in_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- const auto in_tensor = env->tensorAt(in_index);
- const auto out_tensor = env->tensorAt(out_index);
-
- const auto in_data_type = in_tensor->data_type();
- const auto out_data_type = out_tensor->data_type();
- if ((in_data_type == ir::DataType::FLOAT32) && (out_data_type == ir::DataType::FLOAT32))
- {
- invoke(in_tensor, out_tensor, softmax_node.param());
- }
- else
- {
- throw std::runtime_error{"NYI: Support float32 only"};
- }
-}
-
-} // namespace
-
-OpKernel *getSoftmax()
-{
- static OpKernel kernel = {prepareSoftMax, invokeSoftMax};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/TransposeConv.cc b/runtime/onert/core/src/interp/operations/TransposeConv.cc
deleted file mode 100644
index cc2ced26b..000000000
--- a/runtime/onert/core/src/interp/operations/TransposeConv.cc
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/TransposeConv.h>
-#include <misc/polymorphic_downcast.h>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-#include "ir/operation/TransposeConv.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void prepareTransposeConv(ExecEnv *env, const ir::Operation &node)
-{
- const auto ifm_index = node.getInputs().at(ir::operation::TransposeConv::INPUT);
- const auto ker_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL);
- const auto ofm_shape_index = node.getInputs().at(ir::operation::TransposeConv::OUTPUT_SHAPE);
- const auto ofm_index = node.getOutputs().at(0);
-
- const auto ifm_tensor = env->tensorAt(ifm_index);
- const auto ker_tensor = env->tensorAt(ker_index);
- const auto ofm_shape_tensor = env->tensorAt(ofm_shape_index);
-
- assert(ifm_tensor->num_dimensions() == 4);
- assert(ker_tensor->num_dimensions() == 4);
- assert(ofm_shape_tensor->num_dimensions() == 1);
-
- UNUSED_RELEASE(ifm_tensor);
- UNUSED_RELEASE(ker_tensor);
- UNUSED_RELEASE(ofm_shape_tensor);
-
- const auto output_info = env->graph().operands().at(ofm_index).info();
- if (output_info.total_size() == 0)
- {
- // TODO: Handle unspecified output shape
- throw std::runtime_error{"Interp(TConv): NYI unspecified output shape"};
- }
- else
- {
- env->allocateIfNeeded(ofm_index, output_info);
- }
-
- auto ofm_tensor = env->tensorAt(ofm_index);
- UNUSED_RELEASE(ofm_tensor);
-
- // Handle same ifm & ofm data type only
- if (ifm_tensor->data_type() != ofm_tensor->data_type())
- {
- throw std::runtime_error{"Interp(TConv): Different I/O data dype"};
- }
-
- if (ofm_tensor->num_dimensions() != 4)
- {
- throw std::runtime_error{"Interp(TConv): Invalid output rank"};
- }
-}
-
-void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *ofm_tensor,
- const ir::operation::TransposeConv::Param &param)
-{
- const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
- const auto ker_shape = ker_tensor->tensorInfo().shape();
- const auto ker_height = ker_shape.dim(1);
- const auto ker_width = ker_shape.dim(2);
- const auto padding = ir::calculatePadding(param.padding, ofm_shape, ifm_shape, param.stride,
- ker_width, ker_height);
-
- nnfw::cker::TransposeConvParams cker_param;
- cker_param.padding_values.width = padding.left;
- cker_param.padding_values.height = padding.top;
- cker_param.stride_width = param.stride.horizontal;
- cker_param.stride_height = param.stride.vertical;
- cker_param.dilation_width_factor = 1;
- cker_param.dilation_height_factor = 1;
-
- const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape());
- const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape());
- const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape());
- const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO());
- const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO());
- float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer());
-
- nnfw::cker::TransposeConv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr,
- cker_ofm_shape, ofm_ptr);
-}
-
-void invokeTransposeConv(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &tconv_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::TransposeConv &>(node);
-
- const auto ifm_index = node.getInputs().at(ir::operation::TransposeConv::INPUT);
- const auto ker_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL);
- const auto ofm_index = node.getOutputs().at(0);
-
- const auto ifm_tensor = env->tensorAt(ifm_index);
- const auto ker_tensor = env->tensorAt(ker_index);
- const auto ofm_tensor = env->tensorAt(ofm_index);
-
- const auto data_type = ifm_tensor->data_type();
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(ifm_tensor, ker_tensor, ofm_tensor, tconv_node.param());
- }
- else
- {
- throw std::runtime_error{"Interp(TConv): Support float32 only"};
- }
-}
-
-} // namespace
-
-OpKernel *getTransposeConv()
-{
- static OpKernel kernel = {prepareTransposeConv, invokeTransposeConv};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/DataType.cc b/runtime/onert/core/src/ir/DataType.cc
index 80c659b3a..07670c720 100644
--- a/runtime/onert/core/src/ir/DataType.cc
+++ b/runtime/onert/core/src/ir/DataType.cc
@@ -41,11 +41,17 @@ size_t sizeOfDataType(DataType data_type)
case DataType::UINT8:
return sizeof(uint8_t);
case DataType::QUANT_INT8_SYMM:
+ case DataType::QUANT_INT8_ASYMM:
+ case DataType::QUANT_INT8_SYMM_PER_CHANNEL:
return sizeof(int8_t);
case DataType::FLOAT16:
return sizeof(float16);
case DataType::INT64:
return sizeof(int64_t);
+ case DataType::QUANT_INT16_ASYMM:
+ return sizeof(int16_t);
+ case DataType::QUANT_INT16_SYMM:
+ return sizeof(int16_t);
default:
throw std::runtime_error{"Unsupported type size"};
}
diff --git a/runtime/onert/core/src/ir/Graph.cc b/runtime/onert/core/src/ir/Graph.cc
index fe8b1b443..306572c99 100644
--- a/runtime/onert/core/src/ir/Graph.cc
+++ b/runtime/onert/core/src/ir/Graph.cc
@@ -16,18 +16,10 @@
#include "ir/Graph.h"
-#include <algorithm>
-#include <bitset>
-#include <sstream>
-
-#include "util/logging.h"
+#include "OperationValidator.h"
#include "verifier/Verifier.h"
-#include "ir/operation/LowerInfo.h"
-#include "ir/operand/LowerInfo.h"
-#include "ir/operand/PermuteFactor.h"
-#include "ir/OperandIndexMap.h"
-#include "ir/GraphIterator.h"
-#include "backend/IConfig.h"
+
+#include "util/Set.h"
namespace onert
{
@@ -36,6 +28,8 @@ namespace ir
Graph::Graph() = default;
+Graph::Graph(const Graph &) = default;
+
Graph::~Graph(void) = default;
OperandIndex Graph::addOperand(const Shape &shape, const TypeInfo &type)
@@ -43,22 +37,91 @@ OperandIndex Graph::addOperand(const Shape &shape, const TypeInfo &type)
return _operands.emplace(shape, type);
}
-OperationIndex Graph::addOperation(std::unique_ptr<Operation> &&node)
+OperandIndex Graph::addOperand(OperandIndex index, std::unique_ptr<Operand> &&operand)
+{
+ return _operands.push(std::move(operand), index);
+}
+
+bool Graph::checkOperandsForOperation(const IOperation &operation)
{
- assert(isBuildingPhase());
- return _operations.push(std::move(node));
+ auto inputs = operation.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+ auto outputs = operation.getOutputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+ for (auto &&input : inputs)
+ if (!operands().exist(input))
+ return false;
+ for (auto &&input : outputs)
+ if (!operands().exist(input))
+ return false;
+ return true;
+}
+
+void Graph::linkOperandToOperation(OperationIndex index, const IOperation &operation)
+{
+ auto inputs = operation.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+ auto outputs = operation.getOutputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+
+ for (auto &&input : inputs)
+ operands().at(input).insertUse(index);
+ for (auto &&output : outputs)
+ operands().at(output).setDef(index);
+}
+
+OperationIndex Graph::addOperation(std::unique_ptr<IOperation> &&operation)
+{
+ const IOperation &op_ref = *operation;
+ if (!checkOperandsForOperation(op_ref))
+ return OperationIndex{};
+ auto ind = _operations.push(std::move(operation));
+ if (ind.valid())
+ linkOperandToOperation(ind, op_ref);
+ return ind;
+}
+
+OperationIndex Graph::addOperation(OperationIndex index, std::unique_ptr<IOperation> &&operation)
+{
+ const IOperation &op_ref = *operation;
+ if (!checkOperandsForOperation(op_ref))
+ return OperationIndex{};
+ auto ind_gen = _operations.push(std::move(operation), index);
+ if (ind_gen.valid())
+ {
+ assert(ind_gen == index);
+ linkOperandToOperation(index, op_ref);
+ }
+ return index;
+}
+
+OperationIndex Graph::replaceOperation(OperationIndex index,
+ std::unique_ptr<IOperation> &&operation)
+{
+ const IOperation &op_ref = *operation;
+ if (!checkOperandsForOperation(op_ref) || !_operations.exist(index))
+ return OperationIndex{};
+
+ // Check the new operation has the same inputs/outputs as the existing operation
+ const auto &old_op = _operations.at(index);
+ if (!(old_op.getInputs() == op_ref.getInputs() && old_op.getOutputs() == op_ref.getOutputs()))
+ {
+ return OperationIndex{};
+ }
+
+ return _operations.set(index, std::move(operation));
}
void Graph::setOperandValue(const OperandIndex &ind, std::shared_ptr<Data> data)
{
- assert(isBuildingPhase());
assert(_operands.exist(ind));
_operands.at(ind).data(std::move(data));
}
+void Graph::changeShape(const OperandIndex &ind, const ir::Shape &new_shape)
+{
+ assert(_operands.exist(ind));
+ _operands.at(ind).info().shape(new_shape);
+}
+
void Graph::addInput(const OperandIndex &ind, const std::string &name)
{
- assert(isBuildingPhase());
if (!name.empty())
_name_to_input.emplace(name, IOIndex{_inputs.size()});
_inputs.append(ind);
@@ -66,7 +129,6 @@ void Graph::addInput(const OperandIndex &ind, const std::string &name)
void Graph::addOutput(const OperandIndex &ind, const std::string &name)
{
- assert(isBuildingPhase());
if (!name.empty())
_name_to_output.emplace(name, IOIndex{_outputs.size()});
_outputs.append(ind);
@@ -84,62 +146,70 @@ IOIndex Graph::getOutputIndex(const std::string &name) const
return (itr == _name_to_output.end()) ? IOIndex{} : itr->second;
}
-void Graph::finishBuilding(void)
+void Graph::verify(void) const
{
- assert(isBuildingPhase());
- _phase = Phase::MODEL;
-
- initializeUseDef();
- sweepGarbageOperands();
-
// Call graph verifications for the MODEL phase
{
- assert(verifier::DAGChecker().verify(*this));
- assert(verifier::EdgeConsistencyChecker().verify(*this));
+ // Except for edge consistency, the user might have been given a bad model
+ // so here it throws an execption rather than assertion.
+ if (!verifier::InputOutputChecker().verify(*this))
+ throw std::runtime_error{"One of model input and output operands does not exist."};
+ if (!verifier::DAGChecker().verify(*this))
+ throw std::runtime_error{"The graph is cyclic."};
+ assert(verifier::EdgeChecker().verify(*this));
}
+
+ // Check shape independent operation feature
+ // - Operand type
+ // - Shape independent parameter
+ OperationValidator{*this}();
}
void Graph::initializeUseDef()
{
- operations().iterate([&](const OperationIndex &index, const Operation &node) -> void {
- auto outputs = node.getOutputs();
- for (auto output : outputs)
+ operations().iterate([&](const OperationIndex &index, const IOperation &node) -> void {
+ const auto &outputs = node.getOutputs();
+ for (auto &&output : outputs | ir::Remove::UNDEFINED)
{
operands().at(output).setDef(index);
}
- for (auto input : node.getInputs() | ir::Remove::UNDEFINED)
+ for (auto &&input : node.getInputs() | ir::Remove::UNDEFINED)
{
operands().at(input).insertUse(index);
}
});
}
-void Graph::sweepGarbageOperands()
+std::vector<ir::OperationIndex> Graph::topolSortOperations() const
{
- // Remove operands that are not used by any operations, except Graph inputs/outputs
- ir::OperandIndexMap<bool> visited;
-
- operations().iterate([&](const OperationIndex &, const Operation &node) {
- for (auto ind : node.getInputs() + node.getOutputs())
- {
- visited[ind] = true;
- }
- });
-
- // Graph's inputs/outputs are always reachable
- for (auto ind : getInputs() + getOutputs())
- {
- visited[ind] = true;
- }
-
- operands().iterate([&](const OperandIndex &ind, const Operand &) {
- if (!visited[ind])
+ std::vector<ir::OperationIndex> ret;
+ util::Set<ir::OperationIndex> unvisited;
+ operations().iterate(
+ [&](const ir::OperationIndex &index, const ir::IOperation &) { unvisited.add(index); });
+
+ std::function<void(const ir::OperationIndex &, const ir::IOperation &)> dfs =
+ [&](const ir::OperationIndex &index, const ir::IOperation &op) -> void {
+ if (!unvisited.contains(index))
+ return;
+ unvisited.remove(index);
+
+ for (const auto &output : op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
{
- VERBOSE(Graph::sweepGarbageOperands) << "Sweep garbage operand " << ind.value() << std::endl;
- operands().remove(ind);
+ const auto &operand = operands().at(output);
+ for (const auto &use : operand.getUses())
+ {
+ dfs(use, operations().at(use));
+ }
}
- });
+ ret.push_back(index);
+ };
+ operations().iterate(dfs);
+
+ assert(unvisited.empty()); // All of the nodes must have been visited
+ // Reversing Postorder DFS result to make it sorted in topoligical order
+ std::reverse(ret.begin(), ret.end());
+ return ret;
}
} // namespace ir
diff --git a/runtime/onert/core/src/ir/Graph.test.cc b/runtime/onert/core/src/ir/Graph.test.cc
new file mode 100644
index 000000000..144500745
--- /dev/null
+++ b/runtime/onert/core/src/ir/Graph.test.cc
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Graph.h"
+#include "ir/operation/BinaryArithmetic.h"
+
+#include <gtest/gtest.h>
+
+TEST(Graph, neg_inputs_and_outputs)
+{
+ onert::ir::Graph graph;
+
+ onert::ir::OperandIndex index0{0u};
+ onert::ir::OperandIndex index1{1u};
+
+ graph.addInput({index0});
+ graph.addInput({index1});
+
+ onert::ir::OperandIndex index10{10u};
+ onert::ir::OperandIndex index11{11u};
+ onert::ir::OperandIndex index12{12u};
+
+ graph.addOutput({index10});
+ graph.addOutput({index11});
+ graph.addOutput({index12});
+
+ ASSERT_EQ(graph.getInputs().size(), 2);
+ ASSERT_EQ(graph.getOutputs().size(), 3);
+
+ onert::ir::IOIndex io_index0{0};
+ onert::ir::IOIndex io_index1{1};
+ onert::ir::IOIndex io_index2{2};
+
+ ASSERT_EQ(graph.getInputs().at(io_index0), 0);
+ ASSERT_EQ(graph.getInputs().at(io_index1), 1);
+
+ ASSERT_EQ(graph.getOutputs().at(io_index0), 10);
+ ASSERT_EQ(graph.getOutputs().at(io_index1), 11);
+ ASSERT_EQ(graph.getOutputs().at(io_index2), 12);
+
+ EXPECT_THROW(graph.getOutputs().at(onert::ir::IOIndex{3}), std::out_of_range);
+}
+
+using namespace onert::ir;
+
+OperationIndex addAddOperation(Graph &graph, const OperandIndexSequence inputs,
+ const OperandIndexSequence outputs)
+{
+ // Add "ADD" operation
+ operation::BinaryArithmetic::Param param;
+ param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+ param.activation = Activation::NONE;
+ return graph.addOperation(std::make_unique<operation::BinaryArithmetic>(inputs, outputs, param));
+}
+
+TEST(Graph, OneOpGraphSimpleValid)
+{
+ // Simple Graph with just one Add operation
+
+ Graph graph;
+
+ // Add tensors
+ Shape shape{1, 2, 2, 1};
+ TypeInfo type{DataType::FLOAT32};
+ auto lhs = graph.addOperand(shape, type);
+ auto rhs = graph.addOperand(shape, type);
+ auto res = graph.addOperand(shape, type);
+
+ addAddOperation(graph, {lhs, rhs}, {res});
+
+ // Set model inputs/outputs
+ graph.addInput(lhs);
+ graph.addInput(rhs);
+ graph.addOutput(res);
+
+ graph.verify();
+
+ SUCCEED();
+}
+
+TEST(Graph, neg_InvalidGraph_BadInput)
+{
+ Graph graph;
+
+ // Add tensors
+ Shape shape{1, 2, 2, 1};
+ TypeInfo type{DataType::FLOAT32};
+ auto in = graph.addOperand(shape, type);
+ auto out = graph.addOperand(shape, type);
+
+ // Set model inputs/outputs
+ graph.addInput(in);
+ graph.addOutput(out);
+ graph.addInput(OperandIndex{89}); // Non-exisiting operand!
+
+ EXPECT_ANY_THROW(graph.verify());
+}
+
+TEST(Graph, neg_InvalidGraph_BadOutput)
+{
+ Graph graph;
+
+ // Add tensors
+ Shape shape{1, 2, 2, 1};
+ TypeInfo type{DataType::FLOAT32};
+ auto in = graph.addOperand(shape, type);
+ auto out = graph.addOperand(shape, type);
+
+ // Set model inputs/outputs
+ graph.addInput(in);
+ graph.addOutput(out);
+ graph.addOutput(OperandIndex{12}); // Non-exisiting operand!
+
+ EXPECT_ANY_THROW(graph.verify());
+}
+
+TEST(Graph, neg_InvalidAddOperation_BadInputIndex)
+{
+ Graph graph;
+
+ // Add tensors
+ Shape shape{1, 2, 2, 1};
+ TypeInfo type{DataType::FLOAT32};
+ auto lhs = graph.addOperand(shape, type);
+ auto rhs = graph.addOperand(shape, type);
+ auto res = graph.addOperand(shape, type);
+
+ // Set model inputs/outputs
+ graph.addInput(lhs);
+ graph.addInput(rhs);
+ graph.addOutput(res);
+
+ ASSERT_FALSE(addAddOperation(graph, {lhs, OperandIndex{99}}, {res}).valid());
+}
diff --git a/runtime/onert/core/src/ir/GraphIterator.cc b/runtime/onert/core/src/ir/GraphIterator.cc
deleted file mode 100644
index 4bea1a55d..000000000
--- a/runtime/onert/core/src/ir/GraphIterator.cc
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GraphIterator.h"
-
-#include "ir/OperationIndexMap.h"
-#include "compiler/LoweredGraph.h"
-
-namespace onert
-{
-namespace ir
-{
-
-//
-// Graph::DefaultIterator
-//
-
-template <bool is_const>
-void DefaultIterator<is_const>::iterate(GraphRef graph, const IterFn &fn) const
-{
- graph.operations().iterate(
- [&](const OperationIndex &index, NodeRef node) -> void { fn(index, node); });
-}
-
-//
-// Graph::PostDfsIterator
-//
-
-template <bool is_const>
-void PostDfsIterator<is_const>::iterate(GraphRef graph, const IterFn &fn) const
-{
- assert(!graph.isBuildingPhase()); // Restrict iteration condition
-
- OperationIndexMap<bool> visited;
- graph.operations().iterate([&](const OperationIndex &index, NodeRef) { visited[index] = false; });
-
- std::function<void(const OperationIndex &, NodeRef)> dfs_recursive =
- [&](const OperationIndex &index, NodeRef node) -> void {
- if (visited[index])
- return;
- visited[index] = true;
-
- for (const auto output : node.getOutputs() | Remove::DUPLICATED)
- {
- const auto &operand = graph.operands().at(output);
- for (const auto &use : operand.getUses())
- {
- dfs_recursive(use, graph.operations().at(use));
- }
- }
-
- fn(index, node);
- };
-
- graph.operations().iterate(dfs_recursive);
-
- // All of the operations(nodes) must have been visited.
- assert(std::all_of(visited.begin(), visited.end(),
- [](const std::pair<const OperationIndex, bool> &v) { return v.second; }));
-}
-
-template <bool is_const>
-void PostDfsIterator<is_const>::iterateOpSeqs(LoweredGraphRef lowered_graph,
- const OpSeqIterFn &fn) const
-{
- std::unordered_map<OpSequenceIndex, bool> visited;
- lowered_graph.op_seqs().iterate(
- [&](const OpSequenceIndex &index, OpSequenceRef) { visited[index] = false; });
-
- std::function<void(const OpSequenceIndex &, OpSequenceRef)> dfs_recursive =
- [&](const OpSequenceIndex &index, OpSequenceRef op_seq) -> void {
- if (visited[index])
- return;
- visited[index] = true;
-
- for (const auto output : op_seq.getOutputs() | Remove::DUPLICATED)
- {
- const auto &operand = lowered_graph.graph().operands().at(output);
- for (const auto &use : operand.getUses())
- {
- const auto use_op_seq_index = lowered_graph.op_seqs().getOperation(use);
- dfs_recursive(use_op_seq_index, lowered_graph.op_seqs().at(use_op_seq_index));
- }
- }
-
- fn(index, op_seq);
- };
-
- lowered_graph.op_seqs().iterate(dfs_recursive);
-
- // All of the operations(nodes) must have been visited.
- assert(std::all_of(visited.begin(), visited.end(),
- [](const std::pair<const OpSequenceIndex, bool> &v) { return v.second; }));
-}
-
-// Explicit instantiations to have implementation in the source file.
-// NOTE If these instatiations were in the top of this file, `iterate` is compiled and saved in
-// `GraphIterator.cc.o` but `iterateOpSeqs`. This happens only when cross-building for Android.
-// (Maybe a bug of NDK toolchain(clang)?)
-
-template class DefaultIterator<true>;
-template class DefaultIterator<false>;
-
-template class PostDfsIterator<true>;
-template class PostDfsIterator<false>;
-
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/GraphIterator.h b/runtime/onert/core/src/ir/GraphIterator.h
deleted file mode 100644
index b54314e0e..000000000
--- a/runtime/onert/core/src/ir/GraphIterator.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_GRAPH_ITERATOR_H__
-#define __ONERT_IR_GRAPH_ITERATOR_H__
-
-#include <type_traits>
-
-#include "ir/Index.h"
-
-namespace onert
-{
-namespace compiler
-{
-class LoweredGraph;
-} // namespace compiler
-} // namespace onert
-
-namespace onert
-{
-namespace ir
-{
-
-class Graph;
-class Operation;
-class OpSequence;
-
-template <bool is_const> class Iterator
-{
-public:
- using GraphRef = typename std::conditional<is_const, const Graph &, Graph &>::type;
- using IndexRef = const OperationIndex &;
- using NodeRef = typename std::conditional<is_const, const Operation &, Operation &>::type;
- using IterFn = std::function<void(IndexRef, NodeRef)>;
-
-public:
- virtual ~Iterator() = default;
- virtual void iterate(GraphRef graph, const IterFn &fn) const = 0;
-};
-
-template <bool is_const = false> class DefaultIterator final : public Iterator<is_const>
-{
-public:
- using GraphRef = typename Iterator<is_const>::GraphRef;
- using IndexRef = typename Iterator<is_const>::IndexRef;
- using NodeRef = typename Iterator<is_const>::NodeRef;
- using IterFn = typename Iterator<is_const>::IterFn;
-
-public:
- void iterate(GraphRef graph, const IterFn &fn) const;
-};
-using DefaultConstIterator = DefaultIterator<true>;
-
-template <bool is_const = false> class PostDfsIterator final : public Iterator<is_const>
-{
-public:
- using GraphRef = typename Iterator<is_const>::GraphRef;
- using IndexRef = typename Iterator<is_const>::IndexRef;
- using NodeRef = typename Iterator<is_const>::NodeRef;
- using IterFn = typename Iterator<is_const>::IterFn;
- using LoweredGraphRef =
- typename std::conditional<is_const, const typename compiler::LoweredGraph &,
- typename compiler::LoweredGraph &>::type;
- using OpSequenceRef = typename std::conditional<is_const, const OpSequence &, OpSequence &>::type;
- using OpSeqIndexRef = const OpSequenceIndex &;
- using OpSeqIterFn = std::function<void(OpSeqIndexRef, OpSequenceRef)>;
-
-public:
- void iterate(GraphRef graph, const IterFn &fn) const;
- void iterateOpSeqs(LoweredGraphRef lowered_graph, const OpSeqIterFn &f) const;
-};
-using PostDfsConstIterator = PostDfsIterator<true>;
-
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_GRAPH_ITERATOR_H__
diff --git a/runtime/onert/core/src/ir/LayoutSet.cc b/runtime/onert/core/src/ir/LayoutSet.cc
index bd3f438ad..732460aa2 100644
--- a/runtime/onert/core/src/ir/LayoutSet.cc
+++ b/runtime/onert/core/src/ir/LayoutSet.cc
@@ -23,7 +23,7 @@ namespace ir
LayoutSet::LayoutSet(std::initializer_list<Layout> layouts)
{
- for (auto layout : layouts)
+ for (auto &&layout : layouts)
{
_set.insert(layout);
}
@@ -32,7 +32,7 @@ LayoutSet::LayoutSet(std::initializer_list<Layout> layouts)
LayoutSet LayoutSet::operator|(const LayoutSet &other) const
{
auto ret = *this;
- for (auto layout : other)
+ for (auto &&layout : other)
{
ret.add(layout);
}
@@ -42,7 +42,7 @@ LayoutSet LayoutSet::operator|(const LayoutSet &other) const
LayoutSet LayoutSet::operator&(const LayoutSet &other) const
{
LayoutSet ret;
- for (auto layout : other)
+ for (auto &&layout : other)
{
if (contains(layout))
{
@@ -55,7 +55,7 @@ LayoutSet LayoutSet::operator&(const LayoutSet &other) const
LayoutSet LayoutSet::operator-(const LayoutSet &other) const
{
auto ret = *this;
- for (auto layout : other)
+ for (auto &&layout : other)
{
ret.remove(layout);
}
diff --git a/runtime/onert/core/src/ir/LayoutSet.h b/runtime/onert/core/src/ir/LayoutSet.h
index 6ce4e38c6..be077f2f0 100644
--- a/runtime/onert/core/src/ir/LayoutSet.h
+++ b/runtime/onert/core/src/ir/LayoutSet.h
@@ -17,6 +17,7 @@
#ifndef __ONERT_IR_LAYOUT_SET_H__
#define __ONERT_IR_LAYOUT_SET_H__
+#include <cstdint>
#include <initializer_list>
#include <unordered_set>
diff --git a/runtime/onert/core/src/ir/LayoutSet.test.cc b/runtime/onert/core/src/ir/LayoutSet.test.cc
new file mode 100644
index 000000000..fc956abe8
--- /dev/null
+++ b/runtime/onert/core/src/ir/LayoutSet.test.cc
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "LayoutSet.h"
+
+#include <gtest/gtest.h>
+
+using onert::ir::Layout;
+using onert::ir::LayoutSet;
+
+TEST(ir_LayoutSet, neg_add_remove)
+{
+ LayoutSet set{Layout::NCHW};
+ set.remove(Layout::NHWC);
+ ASSERT_EQ(set.size(), 1);
+ set.add(Layout::NHWC);
+ ASSERT_EQ(set.size(), 2);
+ set.remove(Layout::NHWC);
+ ASSERT_EQ(set.size(), 1);
+ set.remove(Layout::NCHW);
+ ASSERT_EQ(set.size(), 0);
+ set.remove(Layout::NCHW);
+ ASSERT_EQ(set.size(), 0);
+}
+
+TEST(ir_LayoutSet, neg_add_twice)
+{
+ LayoutSet set;
+ set.add(Layout::NHWC);
+ ASSERT_EQ(set.size(), 1);
+ set.add(Layout::NHWC);
+ ASSERT_EQ(set.size(), 1);
+}
+
+TEST(ir_LayoutSet, set_operators)
+{
+ LayoutSet set1{Layout::NCHW};
+ LayoutSet set2{Layout::NHWC};
+ LayoutSet set3 = set1 | set2;
+
+ ASSERT_EQ(set3.size(), 2);
+
+ ASSERT_EQ((set3 - set1).size(), 1);
+ ASSERT_EQ((set3 - set1).contains(Layout::NHWC), true);
+ ASSERT_EQ((set3 - set2).size(), 1);
+ ASSERT_EQ((set3 - set2).contains(Layout::NCHW), true);
+ ASSERT_EQ((set3 - set3).size(), 0);
+
+ ASSERT_EQ((set3 & set1).size(), 1);
+ ASSERT_EQ((set3 & set1).contains(Layout::NCHW), true);
+ ASSERT_EQ((set3 & set2).size(), 1);
+ ASSERT_EQ((set3 & set2).contains(Layout::NHWC), true);
+ ASSERT_EQ((set1 & set2).size(), 0);
+}
diff --git a/runtime/onert/core/src/ir/MockNode.h b/runtime/onert/core/src/ir/MockNode.h
new file mode 100644
index 000000000..0e7ed977b
--- /dev/null
+++ b/runtime/onert/core/src/ir/MockNode.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_TEST_GRAPH_MOCK_NODE_H__
+#define __ONERT_TEST_GRAPH_MOCK_NODE_H__
+
+#include "ir/Operation.h"
+#include "ir/OperandIndexSequence.h"
+
+namespace onert_test
+{
+namespace ir
+{
+
+class SimpleMock : public onert::ir::Operation
+{
+public:
+ SimpleMock(const onert::ir::OperandIndexSequence &inputs,
+ const onert::ir::OperandIndexSequence &outputs)
+ : Operation{onert::ir::OperandConstraint::createAny()}
+ {
+ setInputs(inputs);
+ setOutputs(outputs);
+ }
+
+public:
+ void accept(onert::ir::OperationVisitor &) const override {}
+ onert::ir::OpCode opcode() const final { return onert::ir::OpCode::Invalid; }
+};
+
+} // namespace ir
+} // namespace onert_test
+
+#endif // __ONERT_TEST_GRAPH_MOCK_NODE_H__
diff --git a/runtime/onert/core/src/ir/OpSequence.cc b/runtime/onert/core/src/ir/OpSequence.cc
deleted file mode 100644
index e2b989d8c..000000000
--- a/runtime/onert/core/src/ir/OpSequence.cc
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/OpSequence.h"
-
-#include "ir/Operations.h"
-#include "ir/OperationVisitor.h"
-#include <sstream>
-
-namespace
-{
-
-std::string getStrFromIndice(const onert::ir::OperandIndexSequence &indice)
-{
- std::string str;
- for (const auto &ind : indice)
- {
- str += std::to_string(ind.value());
- str.push_back(',');
- }
- if (str.back() == ',')
- str.pop_back();
-
- return str;
-}
-}
-
-namespace onert
-{
-namespace ir
-{
-
-OpSequence::OpSequence(Layout layout) : _layout{layout}, _has_dynamic_tensor{false}
-{
- // DO NOTHING
-}
-
-void OpSequence::accept(OperationVisitor &v) const { v.visit(*this); }
-
-// TODO: Impl Dumper instead of this method
-std::string getStrFromOpSeq(const OpSequence &op_seq, const Operations &operations)
-{
- // " OpSequence IN(0,1,2) -> { op0(0,1,2:3), op1(3:4), op2(4:5) } -> OUT(5)"
- std::stringstream ss;
- ss << " OpSequence IN(" << getStrFromIndice(op_seq.getInputs()) << ") -> {";
- for (const auto &op_idx : op_seq)
- {
- ss << " " << op_idx.value() << "(" << operations.at(op_idx).name() << ":"
- << getStrFromIndice(operations.at(op_idx).getInputs()) << ":"
- << getStrFromIndice(operations.at(op_idx).getOutputs()) << ")";
- }
- ss << " } -> OUT(" << getStrFromIndice(op_seq.getOutputs()) << ")";
- return ss.str();
-}
-
-void OpSequence::remove(const OperationIndex &index)
-{
- assert(exist(index));
- for (auto it = _operations.cbegin(); it != _operations.cend(); ++it)
- {
- if (*it == index)
- {
- _operations.erase(it);
- break;
- }
- }
-}
-
-bool OpSequence::exist(const OperationIndex &index) const
-{
- for (const auto &inner_op_idx : _operations)
- {
- if (inner_op_idx == index)
- {
- return true;
- }
- }
- return false;
-}
-
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/OpSequences.cc b/runtime/onert/core/src/ir/OpSequences.cc
deleted file mode 100644
index 68884783e..000000000
--- a/runtime/onert/core/src/ir/OpSequences.cc
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/OpSequences.h"
-#include "util/logging.h"
-#include <memory>
-
-#include <cassert>
-#include <string>
-
-namespace onert
-{
-namespace ir
-{
-
-OpSequenceIndex OpSequences::emplace(const OperationIndex &index, Layout layout)
-{
- std::unique_ptr<OpSequence> op_seq = std::make_unique<OpSequence>(layout);
- op_seq->appendOperation(index);
- const OpSequenceIndex &seq_index = push(std::move(op_seq));
- cacheSequenceIndex(seq_index, index);
- return seq_index;
-}
-
-OpSequenceIndex OpSequences::emplace(std::unique_ptr<OpSequence> &&op_seq)
-{
- auto &operations = op_seq->operations();
- const OpSequenceIndex &seq_index = push(std::move(op_seq));
- for (const auto &op_idx : operations)
- {
- cacheSequenceIndex(seq_index, op_idx);
- }
- return seq_index;
-}
-
-void OpSequences::cacheSequenceIndex(const OpSequenceIndex &seq_index,
- const OperationIndex &op_index) const
-{
- _seq_indexes.emplace(op_index, seq_index);
-}
-
-OpSequenceIndex *OpSequences::findSequenceIndex(const OperationIndex &operation_index) const
-{
- // If opration_index is cached, return sequence_index from cache
- if (_seq_indexes.count(operation_index))
- {
- auto &op_seq_index = _seq_indexes.at(operation_index);
- if (_objects.count(op_seq_index) && _objects.at(op_seq_index)->exist(operation_index))
- {
- return &op_seq_index;
- }
- else
- {
- _seq_indexes.erase(operation_index);
- return nullptr;
- }
- }
- return nullptr;
-}
-
-bool OpSequences::containsOperation(const OperationIndex &operation_index) const
-{
- return findOperation(operation_index).valid();
-}
-
-OpSequenceIndex OpSequences::getOperation(const OperationIndex &operation_index) const
-{
- OpSequenceIndex ret = findOperation(operation_index);
- assert(ret.valid());
- return ret;
-}
-
-void OpSequences::removeFromOpSequence(const OperationIndex &operation_index)
-{
- const auto op_seq_index = findOperation(operation_index);
- auto &op_seq = at(op_seq_index);
- _seq_indexes.erase(operation_index);
- op_seq.remove(operation_index);
- if (op_seq.size() == 0)
- {
- remove(op_seq_index);
- }
-}
-
-OpSequenceIndex OpSequences::findOperation(const OperationIndex &operation_index) const
-{
- if (OpSequenceIndex *op_seq_index = findSequenceIndex(operation_index))
- return *op_seq_index;
-
- for (auto &e : _objects)
- {
- OpSequence &object = *e.second;
- auto it = find(object.operations().begin(), object.operations().end(), operation_index);
- if (it != object.operations().end())
- {
- cacheSequenceIndex(e.first, operation_index);
- return e.first;
- }
- }
- throw std::runtime_error("Operation not found");
-}
-
-void dumpOpSequences(const OpSequences &op_seqs, const Operations &operations)
-{
- op_seqs.iterate([&](const OpSequenceIndex &idx, const OpSequence &op_seq) {
- VERBOSE(OpSequences) << idx.value() << "] " << getStrFromOpSeq(op_seq, operations) << std::endl;
- });
-}
-
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/Operand.cc b/runtime/onert/core/src/ir/Operand.cc
index e29c7a6ec..18981dbf1 100644
--- a/runtime/onert/core/src/ir/Operand.cc
+++ b/runtime/onert/core/src/ir/Operand.cc
@@ -46,5 +46,11 @@ void Operand::setDef(const OperationIndex &idx) { _def = idx; }
void Operand::unsetDef() { _def = OperationIndex{}; }
+void Operand::clearDefUse()
+{
+ unsetDef();
+ _uses.clear();
+}
+
} // namespace ir
} // namespace onert
diff --git a/runtime/onert/core/src/ir/Operand.test.cc b/runtime/onert/core/src/ir/Operand.test.cc
new file mode 100644
index 000000000..0b858792a
--- /dev/null
+++ b/runtime/onert/core/src/ir/Operand.test.cc
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Graph.h"
+
+#include "MockNode.h"
+#include "verifier/Verifier.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <typeindex>
+
+namespace
+{
+
+using IndexSet = onert::ir::OperandIndexSequence;
+using Mock = onert_test::ir::SimpleMock;
+
+} // namespace
+
+TEST(ir_Operand, neg_usedef)
+{
+ onert::ir::Graph graph;
+ onert::ir::verifier::DAGChecker verifier;
+
+ onert::ir::Shape shape(3);
+ onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+ // Model Input/Output
+ auto input_operand = graph.addOperand(shape, type);
+ auto output_operand = graph.addOperand(shape, type);
+
+ graph.addInput(input_operand);
+ graph.addOutput(output_operand);
+
+ // MockNode1
+ auto operand_index1 = graph.addOperand(shape, type);
+ auto mocknode_index1 =
+ graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index1}));
+
+ // MockNode2
+ auto operand_index2 = graph.addOperand(shape, type);
+ auto mocknode_index2 =
+ graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index2}));
+
+ // MockNode3(two input)
+ auto multiinput_index = graph.addOperation(
+ std::make_unique<Mock>(IndexSet{operand_index1, operand_index2}, IndexSet{output_operand}));
+
+ graph.verify();
+
+ ASSERT_TRUE(verifier.verify(graph));
+
+ // Check def
+ ASSERT_EQ(graph.operands().at(operand_index1).getDef(), mocknode_index1);
+ ASSERT_EQ(graph.operands().at(operand_index2).getDef(), mocknode_index2);
+ ASSERT_EQ(graph.operands().at(output_operand).getDef(), multiinput_index);
+
+ ASSERT_NE(graph.operands().at(operand_index1).getDef(), mocknode_index2);
+ ASSERT_NE(graph.operands().at(operand_index1).getDef(), multiinput_index);
+
+ // Check use
+ ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index1), true);
+ ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index2), true);
+ ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(multiinput_index), false);
+ ASSERT_EQ(graph.operands().at(operand_index1).getUses().contains(multiinput_index), true);
+ ASSERT_EQ(graph.operands().at(operand_index2).getUses().contains(multiinput_index), true);
+
+ ASSERT_EQ(graph.operands().at(input_operand).getUses().size(), 2);
+ ASSERT_EQ(graph.operands().at(operand_index1).getUses().size(), 1);
+ ASSERT_EQ(graph.operands().at(output_operand).getUses().size(), 0);
+}
diff --git a/runtime/onert/core/src/ir/OperandIndexSequence.cc b/runtime/onert/core/src/ir/OperandIndexSequence.cc
index 73f928280..a15b6d0d6 100644
--- a/runtime/onert/core/src/ir/OperandIndexSequence.cc
+++ b/runtime/onert/core/src/ir/OperandIndexSequence.cc
@@ -31,7 +31,7 @@ OperandIndexSequence::OperandIndexSequence(std::initializer_list<OperandIndex> l
OperandIndexSequence::OperandIndexSequence(std::initializer_list<int32_t> list)
{
- for (auto val : list)
+ for (auto &&val : list)
{
_vec.emplace_back(static_cast<uint32_t>(val));
}
@@ -39,7 +39,7 @@ OperandIndexSequence::OperandIndexSequence(std::initializer_list<int32_t> list)
OperandIndexSequence::OperandIndexSequence(std::initializer_list<uint32_t> list)
{
- for (auto val : list)
+ for (auto &&val : list)
{
_vec.emplace_back(val);
}
@@ -55,6 +55,11 @@ void OperandIndexSequence::replace(const OperandIndex &from, const OperandIndex
std::replace(_vec.begin(), _vec.end(), from, to);
}
+bool OperandIndexSequence::operator==(const OperandIndexSequence &other) const
+{
+ return _vec == other._vec;
+}
+
OperandIndexSequence OperandIndexSequence::operator+(const OperandIndexSequence &other) const
{
OperandIndexSequence ret = *this;
@@ -62,10 +67,10 @@ OperandIndexSequence OperandIndexSequence::operator+(const OperandIndexSequence
return ret;
}
-std::ostream &operator<<(std::ostream &o, const OperandIndexSequence &op_seq)
+std::ostream &operator<<(std::ostream &o, const OperandIndexSequence &operand_seq)
{
std::string delimeter;
- for (const auto &ind : op_seq._vec)
+ for (const auto &ind : operand_seq._vec)
{
o << delimeter << ind;
delimeter = ',';
diff --git a/runtime/onert/core/src/ir/OperandIndexSequence.test.cc b/runtime/onert/core/src/ir/OperandIndexSequence.test.cc
new file mode 100644
index 000000000..588c4e419
--- /dev/null
+++ b/runtime/onert/core/src/ir/OperandIndexSequence.test.cc
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/OperandIndexSequence.h"
+
+#include <gtest/gtest.h>
+
+using onert::ir::OperandIndex;
+using onert::ir::OperandIndexSequence;
+
+TEST(ir_OperandIndexSequence, neg_append)
+{
+ OperandIndexSequence iset{0, 2, 4, 8};
+
+ ASSERT_EQ(iset.size(), 4);
+
+ iset.append(OperandIndex{10});
+
+ ASSERT_EQ(iset.size(), 5);
+
+ onert::ir::IOIndex index1{1};
+ onert::ir::IOIndex index2{4};
+
+ ASSERT_EQ(iset.at(index1), 2);
+ ASSERT_EQ(iset.at(index2), 10);
+
+ ASSERT_TRUE(iset.contains(OperandIndex{2}));
+ ASSERT_TRUE(iset.contains(OperandIndex{10}));
+ ASSERT_FALSE(iset.contains(OperandIndex{11}));
+}
+
+TEST(graph_OperandIndexSequence, neg_replace)
+{
+ OperandIndexSequence iset{0, 1, 2, 3};
+
+ iset.replace(OperandIndex{1}, OperandIndex{9});
+ ASSERT_FALSE(iset.contains(OperandIndex{1}));
+ ASSERT_TRUE(iset.contains(OperandIndex{9}));
+}
diff --git a/runtime/onert/core/src/ir/Operands.cc b/runtime/onert/core/src/ir/Operands.cc
index ab32e478a..f8cfd16ef 100644
--- a/runtime/onert/core/src/ir/Operands.cc
+++ b/runtime/onert/core/src/ir/Operands.cc
@@ -29,7 +29,7 @@ Operands::Operands(const Operands &obj)
obj.iterate([&](const OperandIndex &index, const Operand &operand) {
_objects.emplace(index, std::make_unique<Operand>(operand));
});
- _index_count = obj._index_count;
+ _next_index = obj._next_index;
}
} // namespace ir
diff --git a/runtime/onert/core/src/ir/Operands.test.cc b/runtime/onert/core/src/ir/Operands.test.cc
new file mode 100644
index 000000000..aff228b10
--- /dev/null
+++ b/runtime/onert/core/src/ir/Operands.test.cc
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Operands.h"
+
+#include <gtest/gtest.h>
+
+TEST(ir_Operands, neg_set_test)
+{
+ onert::ir::Operands set;
+
+ onert::ir::Shape shape0{1, 2, 3};
+
+ onert::ir::Shape shape1(4);
+ shape1.dim(0) = 10;
+ shape1.dim(1) = 20;
+ shape1.dim(2) = 30;
+ shape1.dim(3) = 40;
+
+ onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+ set.emplace(shape0, type);
+ set.emplace(shape1, type);
+
+ ASSERT_EQ(set.exist(onert::ir::OperandIndex{0u}), true);
+ ASSERT_EQ(set.exist(onert::ir::OperandIndex{1u}), true);
+ ASSERT_EQ(set.exist(onert::ir::OperandIndex{2u}), false);
+
+ ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(0), 1);
+ ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(1), 2);
+ ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(2), 3);
+}
diff --git a/runtime/onert/core/src/ir/Operation.cc b/runtime/onert/core/src/ir/Operation.cc
index 04be8c0d9..64792525d 100644
--- a/runtime/onert/core/src/ir/Operation.cc
+++ b/runtime/onert/core/src/ir/Operation.cc
@@ -24,22 +24,33 @@ namespace ir
{
Operation::Operation(OperandConstraint input_constr, const OperandIndexSequence &inputs,
- const OperandIndexSequence &outputs)
- : _input_constr{input_constr}, _inputs{inputs}, _outputs{outputs}
+ const OperandIndexSequence &outputs, OperandConstraint output_constr)
+ : _input_constr{input_constr}, _output_constr{output_constr}
{
+ setInputs(inputs);
+ setOutputs(outputs);
}
-Operation::Operation(OperandConstraint input_constr) : _input_constr{input_constr} {}
+Operation::Operation(OperandConstraint input_constr, OperandConstraint output_constr)
+ : _input_constr{input_constr}, _output_constr{output_constr}
+{
+}
Operation::~Operation() = default;
void Operation::setInputs(const OperandIndexSequence &indexes)
{
- assert(_input_constr.check(indexes.size()));
+ if (!_input_constr.check(indexes.size()))
+ throw std::runtime_error{"Invalid number of input tensors for this operation."};
_inputs = indexes;
}
-void Operation::setOutputs(const OperandIndexSequence &indexes) { _outputs = indexes; }
+void Operation::setOutputs(const OperandIndexSequence &indexes)
+{
+ if (!_output_constr.check(indexes.size()))
+ throw std::runtime_error{"Invalid number of output tensors for this operation."};
+ _outputs = indexes;
+}
void Operation::replaceInputs(const OperandIndex &from, const OperandIndex &to)
{
diff --git a/runtime/onert/core/src/ir/Operation.test.cc b/runtime/onert/core/src/ir/Operation.test.cc
new file mode 100644
index 000000000..b3c4e852d
--- /dev/null
+++ b/runtime/onert/core/src/ir/Operation.test.cc
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Graph.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexSequence.h"
+#include "ir/operation/Concat.h"
+#include "ir/operation/Conv2D.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <stdexcept>
+
+using Index = onert::ir::IOIndex;
+using IndexSet = onert::ir::OperandIndexSequence;
+
+TEST(ir_Operation_setIO, operation_setIO_conv)
+{
+ onert::ir::Graph graph;
+
+ onert::ir::Shape shape{3};
+ onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+ // Add Conv
+ using Graph = onert::ir::operation::Conv2D;
+
+ auto input_operand = graph.addOperand(shape, type);
+ auto kernel_operand = graph.addOperand(shape, type);
+ auto bias_operand = graph.addOperand(shape, type);
+ IndexSet inputs{input_operand, kernel_operand, bias_operand};
+
+ Graph::Param conv_params;
+ conv_params.padding.type = onert::ir::PaddingType::SAME;
+ conv_params.stride.horizontal = 1;
+ conv_params.stride.vertical = 1;
+ conv_params.activation = onert::ir::Activation::NONE;
+
+ auto output_operand = graph.addOperand(shape, type).value();
+ IndexSet outputs{output_operand};
+
+ auto conv = std::make_unique<Graph>(inputs, outputs, conv_params);
+
+ ASSERT_NE(conv, nullptr);
+ ASSERT_EQ(conv->getInputs().at(Index{0}).value(), inputs.at(0).value());
+ conv->setInputs({8, 9, 10});
+ ASSERT_NE(conv->getInputs().at(Index{0}).value(), inputs.at(0).value());
+ ASSERT_EQ(conv->getInputs().at(Index{0}).value(), 8);
+}
+
+TEST(ir_Operation_setIO, neg_operation_setIO_concat)
+{
+ onert::ir::Graph graph;
+
+ onert::ir::Shape shape{3};
+
+ onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+ using Graph = onert::ir::operation::Concat;
+
+ // Add Concat
+ IndexSet inputs;
+ for (int i = 0; i < 6; ++i)
+ {
+ inputs.append(graph.addOperand(shape, type));
+ }
+
+ Graph::Param concat_params{0};
+
+ auto output_operand = graph.addOperand(shape, type).value();
+ IndexSet outputs{output_operand};
+
+ auto concat = std::make_unique<Graph>(inputs, outputs, concat_params);
+
+ ASSERT_NE(concat, nullptr);
+ ASSERT_EQ(concat->getInputs().size(), 6);
+ ASSERT_EQ(concat->getInputs().at(Index{0}).value(), inputs.at(0).value());
+
+ concat->setInputs({80, 6, 9, 11});
+ ASSERT_EQ(concat->getInputs().size(), 4);
+ ASSERT_NE(concat->getInputs().at(Index{0}).value(), inputs.at(0).value());
+ ASSERT_EQ(concat->getInputs().at(Index{0}).value(), 80);
+ ASSERT_EQ(concat->getInputs().at(Index{2}).value(), 9);
+ ASSERT_THROW(concat->getInputs().at(Index{5}), std::out_of_range);
+}
diff --git a/runtime/onert/core/src/ir/OperationCloner.cc b/runtime/onert/core/src/ir/OperationCloner.cc
index b4e60f0bc..64e1cc807 100644
--- a/runtime/onert/core/src/ir/OperationCloner.cc
+++ b/runtime/onert/core/src/ir/OperationCloner.cc
@@ -23,6 +23,23 @@ namespace onert
namespace ir
{
+namespace
+{
+
+class OperationCloner : public OperationVisitor
+{
+public:
+#define OP(Name) void visit(const operation::Name &o) override;
+#include "ir/Operations.lst"
+#undef OP
+
+public:
+ std::unique_ptr<Operation> releaseClone();
+
+private:
+ std::unique_ptr<Operation> _return_op;
+};
+
#define OP(Name) \
void OperationCloner::visit(const operation::Name &o) \
{ \
@@ -38,5 +55,14 @@ std::unique_ptr<Operation> OperationCloner::releaseClone()
return std::move(_return_op);
}
+} // namespace
+
+std::unique_ptr<Operation> clone(const IOperation &operation)
+{
+ OperationCloner cloner;
+ operation.accept(cloner);
+ return cloner.releaseClone();
+}
+
} // namespace ir
} // namespace onert
diff --git a/runtime/onert/core/src/ir/OperationCloner.h b/runtime/onert/core/src/ir/OperationCloner.h
index 0e8cda2a0..49297a05c 100644
--- a/runtime/onert/core/src/ir/OperationCloner.h
+++ b/runtime/onert/core/src/ir/OperationCloner.h
@@ -26,19 +26,7 @@ namespace onert
namespace ir
{
-class OperationCloner : public OperationVisitor
-{
-public:
-#define OP(Name) void visit(const operation::Name &o) override;
-#include "ir/Operations.lst"
-#undef OP
-
-public:
- std::unique_ptr<Operation> releaseClone();
-
-private:
- std::unique_ptr<Operation> _return_op;
-};
+std::unique_ptr<Operation> clone(const IOperation &operation);
} // namespace ir
} // namespace onert
diff --git a/runtime/onert/core/src/ir/OperationDumper.cc b/runtime/onert/core/src/ir/OperationDumper.cc
index 48361f464..5e6d700f3 100644
--- a/runtime/onert/core/src/ir/OperationDumper.cc
+++ b/runtime/onert/core/src/ir/OperationDumper.cc
@@ -29,19 +29,21 @@ using namespace operation;
namespace
{
-void dumpUnaryInputOp(const Operation &node, const std::string &adding_input = "")
+
+// Dump all input and output.
+// Use this function when there is no special input or(and) output.
+void dumpOpGeneric(const Operation &node, const std::string &adding_input = "")
{
VERBOSE(LIR) << "* " << node.name() << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(0) << ") " << adding_input
- << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs() << ") " << adding_input << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs() << ")" << std::endl;
}
-void dumpBinaryInputOp(const Operation &node, const std::string &adding_input = "")
+void dumpUnaryInputOp(const Operation &node, const std::string &adding_input = "")
{
VERBOSE(LIR) << "* " << node.name() << std::endl;
- VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(0) << ", " << node.getInputs().at(0)
- << ") " << adding_input << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(0) << ") " << adding_input
+ << std::endl;
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
}
@@ -53,18 +55,6 @@ void dumpConvOp(const Operation &node, const std::string &padding_type)
<< node.getInputs().at(Conv2D::Input::BIAS) << ")" << std::endl;
VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
}
-
-void dumpPackingOp(const Operation &node)
-{
- VERBOSE(LIR) << "* " << node.name() << std::endl;
- std::string inputs;
- for (auto i : node.getInputs())
- {
- inputs += std::to_string(i.value()) + ",";
- }
- VERBOSE(LIR) << " - Inputs : Inputs(" << inputs << ")" << std::endl;
- VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
} // namespace
OperationDumper::OperationDumper(const std::string &start_msg)
@@ -72,41 +62,62 @@ OperationDumper::OperationDumper(const std::string &start_msg)
VERBOSE(LIR) << start_msg << std::endl;
}
-void OperationDumper::visit(const ArgMax &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const ArgMinMax &node)
+{
+ std::string min_max = node.param().is_arg_max ? "(Max)" : "(Min)";
+ VERBOSE(LIR) << "* " << node.name() << min_max << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ArgMinMax::INPUT) << ") Axis("
+ << node.getInputs().at(ArgMinMax::AXIS) << ") " << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+}
void OperationDumper::visit(const BatchToSpaceND &node)
{
std::string block_size =
- "BlockSize(" +
- std::to_string(node.getInputs().at(BatchToSpaceND::Input::BLOCK_SIZE).value()) + ")";
- dumpUnaryInputOp(node, block_size);
+ "BlockSize(" + std::to_string(node.getInputs().at(BatchToSpaceND::Input::BLOCK_SIZE).value()) +
+ ")";
+ dumpOpGeneric(node, block_size);
}
-void OperationDumper::visit(const BinaryArithmetic &node) { dumpBinaryInputOp(node); }
+void OperationDumper::visit(const BCQFullyConnected &node)
+{
+ VERBOSE(LIR) << "* " << node.name() << std::endl;
+ VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(BCQFullyConnected::Input::INPUT)
+ << ") WeightsBinary("
+ << node.getInputs().at(BCQFullyConnected::Input::WEIGHTS_BINARY)
+ << ") WeightsScales("
+ << node.getInputs().at(BCQFullyConnected::Input::WEIGHTS_SCALES)
+ << ") WeightsClusters("
+ << node.getInputs().at(BCQFullyConnected::Input::WEIGHTS_CLUSTERS) << ") Bias("
+ << node.getInputs().at(BCQFullyConnected::Input::BIAS) << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
+}
+
+void OperationDumper::visit(const BinaryArithmetic &node) { dumpOpGeneric(node); }
-void OperationDumper::visit(const operation::BroadcastTo &node) { dumpBinaryInputOp(node); }
+void OperationDumper::visit(const operation::BroadcastTo &node) { dumpOpGeneric(node); }
-void OperationDumper::visit(const Comparison &node) { dumpBinaryInputOp(node); }
+void OperationDumper::visit(const Comparison &node) { dumpOpGeneric(node); }
-void OperationDumper::visit(const Concat &node) { dumpPackingOp(node); }
+void OperationDumper::visit(const Concat &node) { dumpOpGeneric(node); }
void OperationDumper::visit(const Conv2D &node)
{
std::string padding_type =
- node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
+ node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
dumpConvOp(node, padding_type);
}
-void OperationDumper::visit(const ConvertFp16ToFp32 &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const ConvertFp16ToFp32 &node) { dumpOpGeneric(node); }
-void OperationDumper::visit(const ConvertFp32ToFp16 &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const ConvertFp32ToFp16 &node) { dumpOpGeneric(node); }
-void OperationDumper::visit(const DepthToSpace &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const DepthToSpace &node) { dumpOpGeneric(node); }
void OperationDumper::visit(const DepthwiseConv2D &node)
{
std::string padding_type =
- node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
+ node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
dumpConvOp(node, padding_type);
}
@@ -122,12 +133,12 @@ void OperationDumper::visit(const ElementwiseActivation &node)
{
params = " alpha value(" + std::to_string(node.param().alpha) + ")";
}
- dumpUnaryInputOp(node, params);
+ dumpOpGeneric(node, params);
}
-void OperationDumper::visit(const ElementwiseBinary &node) { dumpBinaryInputOp(node); }
+void OperationDumper::visit(const ElementwiseBinary &node) { dumpOpGeneric(node); }
-void OperationDumper::visit(const ElementwiseUnary &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const ElementwiseUnary &node) { dumpOpGeneric(node); }
void OperationDumper::visit(const EmbeddingLookup &node)
{
@@ -141,22 +152,30 @@ void OperationDumper::visit(const EmbeddingLookup &node)
void OperationDumper::visit(const ExpandDims &node)
{
std::string axis =
- "AXIS(" + std::to_string(node.getInputs().at(ExpandDims::Input::AXIS).value()) + ")";
+ "AXIS(" + std::to_string(node.getInputs().at(ExpandDims::Input::AXIS).value()) + ")";
dumpUnaryInputOp(node, axis);
}
+void OperationDumper::visit(const Fill &node)
+{
+ VERBOSE(LIR) << "* " << node.name() << std::endl;
+ VERBOSE(LIR) << " - Inputs : Shape(" << node.getInputs().at(Fill::Input::SHAPE) << ") Value("
+ << node.getInputs().at(Fill::Input::VALUE) << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+}
+
void OperationDumper::visit(const FullyConnected &node)
{
std::string inputs =
- "Weight(" + std::to_string(node.getInputs().at(FullyConnected::Input::WEIGHT).value()) +
- ") Bias(" + std::to_string(node.getInputs().at(FullyConnected::Input::BIAS).value()) + ")";
+ "Weight(" + std::to_string(node.getInputs().at(FullyConnected::Input::WEIGHT).value()) +
+ ") Bias(" + std::to_string(node.getInputs().at(FullyConnected::Input::BIAS).value()) + ")";
dumpUnaryInputOp(node, inputs);
}
void OperationDumper::visit(const Gather &node)
{
std::string indices =
- "Indices(" + std::to_string(node.getInputs().at(Gather::Input::INDICES).value()) + ")";
+ "Indices(" + std::to_string(node.getInputs().at(Gather::Input::INDICES).value()) + ")";
dumpUnaryInputOp(node, indices);
}
@@ -174,50 +193,70 @@ void OperationDumper::visit(const HashtableLookup &node)
void OperationDumper::visit(const InstanceNorm &node)
{
std::string inputs =
- "Gamma(" + std::to_string(node.getInputs().at(InstanceNorm::Input::GAMMA).value()) +
- ") Beta(" + std::to_string(node.getInputs().at(InstanceNorm::Input::BETA).value()) + ")";
+ "Gamma(" + std::to_string(node.getInputs().at(InstanceNorm::Input::GAMMA).value()) + ") Beta(" +
+ std::to_string(node.getInputs().at(InstanceNorm::Input::BETA).value()) + ")";
dumpUnaryInputOp(node, inputs);
}
-void OperationDumper::visit(const L2Normalization &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const L2Normalization &node) { dumpOpGeneric(node); }
+
+void OperationDumper::visit(const LocalResponseNormalization &node) { dumpOpGeneric(node); }
-void OperationDumper::visit(const LocalResponseNormalization &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const Loss &node)
+{
+ VERBOSE(LIR) << "* " << node.name() << std::endl;
+ VERBOSE(LIR) << " - Inputs : Prediction(" << node.getInputs().at(Loss::Input::Y_PRED) << ") True("
+ << node.getInputs().at(Loss::Input::Y_TRUE) << ")" << std::endl;
+ VERBOSE(LIR) << " - Outputs : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+}
void OperationDumper::visit(const LSTM &node)
{
+ VERBOSE(LIR) << "* " << node.name() << std::endl;
VERBOSE(LIR)
- << " - Inputs : Input(" << node.getInputs().at(LSTM::Input::INPUT)
- << ") Input To Input Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_INPUT_WEIGHTS)
- << ") Input To Forget Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_FORGET_WEIGHTS)
- << ") Input To Cell Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_CELL_WEIGHTS)
- << ") Input To Output Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)
- << ") Recurrent To Input Weights("
- << node.getInputs().at(LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)
- << ") Recurrent To Forget Weights("
- << node.getInputs().at(LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)
- << ") Recurrent To Cell Weights("
- << node.getInputs().at(LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)
- << ") Recurrent To Output Weights("
- << node.getInputs().at(LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS) << ") Cell To Input Weights("
- << node.getInputs().at(LSTM::Input::CELL_TO_INPUT_WEIGHTS) << ") Cell To Forget Weights("
- << node.getInputs().at(LSTM::Input::CELL_TO_FORGET_WEIGHTS) << ") Cell To OUTPUT Weights("
- << node.getInputs().at(LSTM::Input::CELL_TO_OUTPUT_WEIGHTS) << ") Input Gate Bias("
- << node.getInputs().at(LSTM::Input::INPUT_GATE_BIAS) << ") Forget Gate Bias("
- << node.getInputs().at(LSTM::Input::FORGET_GATE_BIAS) << ") Cell Bias("
- << node.getInputs().at(LSTM::Input::CELL_BIAS) << ") Output Gate Bias("
- << node.getInputs().at(LSTM::Input::OUTPUT_GATE_BIAS) << ") Projection Weights("
- << node.getInputs().at(LSTM::Input::PROJECTION_WEIGHTS) << ") Projection Bias("
- << node.getInputs().at(LSTM::Input::PROJECTION_BIAS) << ") Output State In("
- << node.getInputs().at(LSTM::Input::OUTPUT_STATE_IN) << ") Cell State In("
- << node.getInputs().at(LSTM::Input::CELL_STATE_IN) << ")" << std::endl;
+ << " - Inputs : Input(" << node.getInputs().at(LSTM::Input::INPUT)
+ << ") Input To Input Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_INPUT_WEIGHTS)
+ << ") Input To Forget Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_FORGET_WEIGHTS)
+ << ") Input To Cell Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_CELL_WEIGHTS)
+ << ") Input To Output Weights(" << node.getInputs().at(LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)
+ << ") Recurrent To Input Weights("
+ << node.getInputs().at(LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)
+ << ") Recurrent To Forget Weights("
+ << node.getInputs().at(LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)
+ << ") Recurrent To Cell Weights(" << node.getInputs().at(LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)
+ << ") Recurrent To Output Weights("
+ << node.getInputs().at(LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS) << ") Cell To Input Weights("
+ << node.getInputs().at(LSTM::Input::CELL_TO_INPUT_WEIGHTS) << ") Cell To Forget Weights("
+ << node.getInputs().at(LSTM::Input::CELL_TO_FORGET_WEIGHTS) << ") Cell To OUTPUT Weights("
+ << node.getInputs().at(LSTM::Input::CELL_TO_OUTPUT_WEIGHTS) << ") Input Gate Bias("
+ << node.getInputs().at(LSTM::Input::INPUT_GATE_BIAS) << ") Forget Gate Bias("
+ << node.getInputs().at(LSTM::Input::FORGET_GATE_BIAS) << ") Cell Bias("
+ << node.getInputs().at(LSTM::Input::CELL_BIAS) << ") Output Gate Bias("
+ << node.getInputs().at(LSTM::Input::OUTPUT_GATE_BIAS) << ") Projection Weights("
+ << node.getInputs().at(LSTM::Input::PROJECTION_WEIGHTS) << ") Projection Bias("
+ << node.getInputs().at(LSTM::Input::PROJECTION_BIAS) << ") Output State In("
+ << node.getInputs().at(LSTM::Input::OUTPUT_STATE_IN) << ") Cell State In("
+ << node.getInputs().at(LSTM::Input::CELL_STATE_IN);
+ if (node.getInputs().size() == 24)
+ {
+ VERBOSE(LIR) << ") Input Layer Normalization Weights("
+ << node.getInputs().at(LSTM::Input::INPUT_LAYER_NORMALIZATION_WEIGHTS)
+ << ") Forget Layer Normalization Weights("
+ << node.getInputs().at(LSTM::Input::FORGET_LAYER_NORMALIZATION_WEIGHTS)
+ << ") Cell Layer Normalization Weights("
+ << node.getInputs().at(LSTM::Input::CELL_LAYER_NORMALIZATION_WEIGHTS)
+ << ") Ouput Layer Normalization Weights("
+ << node.getInputs().at(LSTM::Input::OUTPUT_LAYER_NORMALIZATION_WEIGHTS);
+ }
+ VERBOSE(LIR) << ")" << std::endl;
VERBOSE(LIR) << " - Output : Scratch Buffer("
<< node.getOutputs().at(LSTM::Output::SCRATCH_BUFFER) << ") Output State Out("
- << node.getInputs().at(LSTM::Output::OUTPUT_STATE_OUT) << ") Cell State Out("
- << node.getInputs().at(LSTM::Output::CELL_STATE_OUT) << ") Output("
- << node.getInputs().at(LSTM::Output::OUTPUT) << ")" << std::endl;
+ << node.getOutputs().at(LSTM::Output::OUTPUT_STATE_OUT) << ") Cell State Out("
+ << node.getOutputs().at(LSTM::Output::CELL_STATE_OUT) << ") Output("
+ << node.getOutputs().at(LSTM::Output::OUTPUT) << ")" << std::endl;
}
-void OperationDumper::visit(const Pack &node) { dumpPackingOp(node); }
+void OperationDumper::visit(const Pack &node) { dumpOpGeneric(node); }
void OperationDumper::visit(const Pad &node)
{
@@ -249,23 +288,23 @@ void OperationDumper::visit(const Permute &node)
void OperationDumper::visit(const Pool2D &node)
{
std::string padding_type =
- node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
+ node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
VERBOSE(LIR) << "* " << node.name() << "(" << padding_type << ")" << std::endl;
VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(Pool2D::Input::INPUT) << ")"
<< std::endl;
VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const Pow &node) { dumpBinaryInputOp(node); }
+void OperationDumper::visit(const Pow &node) { dumpOpGeneric(node); }
void OperationDumper::visit(const PReLU &node)
{
std::string alpha =
- "Alpha(" + std::to_string(node.getInputs().at(PReLU::Input::ALPHA).value()) + ")";
- dumpUnaryInputOp(node, alpha);
+ "Alpha(" + std::to_string(node.getInputs().at(PReLU::Input::ALPHA).value()) + ")";
+ dumpOpGeneric(node, alpha);
}
-void OperationDumper::visit(const Rank &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const Rank &node) { dumpOpGeneric(node); }
void OperationDumper::visit(const Reduce &node) { dumpUnaryInputOp(node); }
@@ -273,18 +312,20 @@ void OperationDumper::visit(const Reshape &node)
{
// optional param
std::string shape =
- node.getInputs().size() == 2
- ? "Shape(" + std::to_string(node.getInputs().at(Reshape::Input::SHAPE).value()) + ")"
- : "Shape(not provided)";
+ node.getInputs().size() == 2
+ ? "Shape(" + std::to_string(node.getInputs().at(Reshape::Input::SHAPE).value()) + ")"
+ : "Shape(not provided)";
dumpUnaryInputOp(node, shape);
}
-void OperationDumper::visit(const ResizeBilinear &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const ResizeBilinear &node) { dumpOpGeneric(node); }
+
+void OperationDumper::visit(const ResizeNearestNeighbor &node) { dumpOpGeneric(node); }
void OperationDumper::visit(const Reverse &node)
{
std::string axis =
- "Axis(" + std::to_string(node.getInputs().at(Reverse::Input::AXIS).value()) + ")";
+ "Axis(" + std::to_string(node.getInputs().at(Reverse::Input::AXIS).value()) + ")";
dumpUnaryInputOp(node, axis);
}
@@ -320,25 +361,24 @@ void OperationDumper::visit(const Select &node)
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const ir::operation::Shape &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const ir::operation::Shape &node) { dumpOpGeneric(node); }
-void OperationDumper::visit(const Softmax &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const Softmax &node) { dumpOpGeneric(node); }
void OperationDumper::visit(const SpaceToBatchND &node)
{
std::string inputs =
- "BlockSize(" +
- std::to_string(node.getInputs().at(SpaceToBatchND::Input::BLOCK_SIZE).value()) +
- ") Paddings(" + std::to_string(node.getInputs().at(SpaceToBatchND::Input::PADDINGS).value()) +
- ")";
+ "BlockSize(" + std::to_string(node.getInputs().at(SpaceToBatchND::Input::BLOCK_SIZE).value()) +
+ ") Paddings(" + std::to_string(node.getInputs().at(SpaceToBatchND::Input::PADDINGS).value()) +
+ ")";
dumpUnaryInputOp(node, inputs);
}
-void OperationDumper::visit(const SpaceToDepth &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const SpaceToDepth &node) { dumpOpGeneric(node); }
-void OperationDumper::visit(const Split &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const Split &node) { dumpOpGeneric(node); }
-void OperationDumper::visit(const SquaredDifference &node) { dumpBinaryInputOp(node); }
+void OperationDumper::visit(const SquaredDifference &node) { dumpOpGeneric(node); }
void OperationDumper::visit(const StatelessRandomUniform &node)
{
@@ -349,7 +389,7 @@ void OperationDumper::visit(const StatelessRandomUniform &node)
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const Squeeze &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const Squeeze &node) { dumpOpGeneric(node); }
void OperationDumper::visit(const Slice &node) { dumpUnaryInputOp(node); }
@@ -358,7 +398,7 @@ void OperationDumper::visit(const StridedSlice &node) { dumpUnaryInputOp(node);
void OperationDumper::visit(const Tile &node)
{
std::string multiples =
- "Multiples(" + std::to_string(node.getInputs().at(Tile::Input::MULTIPLES).value()) + ")";
+ "Multiples(" + std::to_string(node.getInputs().at(Tile::Input::MULTIPLES).value()) + ")";
dumpUnaryInputOp(node, multiples);
}
@@ -375,7 +415,7 @@ void OperationDumper::visit(const TopKV2 &node)
void OperationDumper::visit(const TransposeConv &node)
{
std::string padding_type =
- node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
+ node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
VERBOSE(LIR) << "* TransposeConv(" << padding_type << ")" << std::endl;
VERBOSE(LIR) << " - Inputs : Output Shape("
<< node.getInputs().at(TransposeConv::Input::OUTPUT_SHAPE) << ") KERNEL("
@@ -384,22 +424,14 @@ void OperationDumper::visit(const TransposeConv &node)
VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
}
-void OperationDumper::visit(const Transpose &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const Transpose &node) { dumpOpGeneric(node); }
void OperationDumper::visit(const Unpack &node)
{
VERBOSE(LIR) << "* " << node.name() << std::endl;
VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Unpack::Input::INPUT) << ")"
<< std::endl;
- std::string outputs;
- const auto &output_indices = node.getOutputs();
- for (auto it = std::begin(output_indices); it != std::end(output_indices); ++it)
- {
- outputs += std::to_string(it->value());
- if (std::next(it) != std::end(output_indices))
- outputs += ", ";
- }
- VERBOSE(LIR) << " - Outputs : Outputs(" << outputs << ")" << std::endl;
+ VERBOSE(LIR) << " - Output : Outputs(" << node.getOutputs() << ")" << std::endl;
}
void OperationDumper::visit(const OneHot &node)
@@ -413,51 +445,21 @@ void OperationDumper::visit(const OneHot &node)
void OperationDumper::visit(const If &node)
{
VERBOSE(LIR) << "* " << node.name() << std::endl;
- std::string inputs;
- const auto &input_indices = node.getInputs();
- for (auto it = std::begin(input_indices); it != std::end(input_indices); ++it)
- {
- inputs += std::to_string(it->value());
- if (std::next(it) != std::end(input_indices))
- inputs += ", ";
- }
VERBOSE(LIR) << " - Inputs : "
<< "Then subgraph (" << node.param().then_subg_index << ") Else subgraph ("
- << node.param().else_subg_index << ") Inputs(" << inputs << ")" << std::endl;
- std::string outputs;
- const auto &output_indices = node.getOutputs();
- for (auto it = std::begin(output_indices); it != std::end(output_indices); ++it)
- {
- outputs += std::to_string(it->value());
- if (std::next(it) != std::end(output_indices))
- outputs += ", ";
- }
- VERBOSE(LIR) << " - Output : Outputs(" << outputs << ")" << std::endl;
+ << node.param().else_subg_index << ") Inputs(" << node.getInputs() << ")"
+ << std::endl;
+ VERBOSE(LIR) << " - Output : Outputs(" << node.getOutputs() << ")" << std::endl;
}
void OperationDumper::visit(const While &node)
{
VERBOSE(LIR) << "* " << node.name() << std::endl;
- std::string inputs;
- const auto &input_indices = node.getInputs();
- for (auto it = std::begin(input_indices); it != std::end(input_indices); ++it)
- {
- inputs += std::to_string(it->value());
- if (std::next(it) != std::end(input_indices))
- inputs += ", ";
- }
VERBOSE(LIR) << " - Inputs : "
<< "Cond subgraph (" << node.param().cond_subg_index << ") Body subgraph ("
- << node.param().cond_subg_index << ") Inputs(" << inputs << ")" << std::endl;
- std::string outputs;
- const auto &output_indices = node.getOutputs();
- for (auto it = std::begin(output_indices); it != std::end(output_indices); ++it)
- {
- outputs += std::to_string(it->value());
- if (std::next(it) != std::end(output_indices))
- outputs += ", ";
- }
- VERBOSE(LIR) << " - Output : Outputs(" << outputs << ")" << std::endl;
+ << node.param().body_subg_index << ") Inputs(" << node.getInputs() << ")"
+ << std::endl;
+ VERBOSE(LIR) << " - Output : Outputs(" << node.getOutputs() << ")" << std::endl;
}
} // namespace ir
diff --git a/runtime/onert/core/src/ir/OperationDumper.h b/runtime/onert/core/src/ir/OperationDumper.h
index e8ab3b3cd..99bf869d5 100644
--- a/runtime/onert/core/src/ir/OperationDumper.h
+++ b/runtime/onert/core/src/ir/OperationDumper.h
@@ -31,8 +31,9 @@ public:
OperationDumper(const std::string &start_msg);
public:
- void visit(const operation::ArgMax &) override;
+ void visit(const operation::ArgMinMax &) override;
void visit(const operation::BatchToSpaceND &node) override;
+ void visit(const operation::BCQFullyConnected &node) override;
void visit(const operation::BinaryArithmetic &node) override;
void visit(const operation::BroadcastTo &) override;
void visit(const operation::Comparison &) override;
@@ -47,12 +48,14 @@ public:
void visit(const operation::ElementwiseUnary &) override;
void visit(const operation::EmbeddingLookup &) override;
void visit(const operation::ExpandDims &) override;
+ void visit(const operation::Fill &) override;
void visit(const operation::FullyConnected &node) override;
void visit(const operation::Gather &) override;
void visit(const operation::HashtableLookup &) override;
void visit(const operation::InstanceNorm &) override;
void visit(const operation::L2Normalization &) override;
void visit(const operation::LocalResponseNormalization &) override;
+ void visit(const operation::Loss &node) override;
void visit(const operation::LSTM &) override;
void visit(const operation::Pack &) override;
void visit(const operation::Pad &) override;
@@ -65,6 +68,7 @@ public:
void visit(const operation::Reduce &) override;
void visit(const operation::Reshape &node) override;
void visit(const operation::ResizeBilinear &) override;
+ void visit(const operation::ResizeNearestNeighbor &) override;
void visit(const operation::Reverse &) override;
void visit(const operation::RNN &) override;
void visit(const operation::Select &node) override;
diff --git a/runtime/onert/core/src/ir/OperationValidator.cc b/runtime/onert/core/src/ir/OperationValidator.cc
new file mode 100644
index 000000000..09f773cf0
--- /dev/null
+++ b/runtime/onert/core/src/ir/OperationValidator.cc
@@ -0,0 +1,545 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OperationValidator.h"
+
+#include "ir/Graph.h"
+#include "util/logging.h"
+
+#define OP_REQUIRES(EXP) \
+ do \
+ { \
+ if (!(EXP)) \
+ throw std::runtime_error("OperationValidator failed at line " + std::to_string(__LINE__)); \
+ } while (0)
+
+namespace onert
+{
+namespace ir
+{
+
+OperationValidator::OperationValidator(const Graph &graph)
+ : _operations{graph.operations()}, _operands{graph.operands()}
+{
+}
+
+void OperationValidator::operator()()
+{
+ _operations.iterate([&](const OperationIndex &, const IOperation &node) { node.accept(*this); });
+}
+
+DataType OperationValidator::operandType(const OperandIndex &idx)
+{
+ return _operands.at(idx).typeInfo().type();
+}
+
+bool OperationValidator::isConstant(const OperandIndex &idx)
+{
+ return _operands.at(idx).isConstant();
+}
+
+bool OperationValidator::isSameType(const OperandIndex &idx1, const OperandIndex &idx2)
+{
+ return operandType(idx1) == operandType(idx2);
+}
+
+bool OperationValidator::isSameQuantParam(const OperandIndex &idx1, const OperandIndex &idx2)
+{
+ if (_operands.at(idx1).typeInfo().scale() != _operands.at(idx2).typeInfo().scale())
+ return false;
+
+ if (_operands.at(idx1).typeInfo().zero_point() != _operands.at(idx2).typeInfo().zero_point())
+ return false;
+
+ return true;
+}
+
+bool OperationValidator::isValidType(const OperandIndex &idx, const DataType &type)
+{
+ return operandType(idx) == type;
+}
+
+bool OperationValidator::isValidType(const OperandIndex &idx,
+ std::initializer_list<DataType> valid_types)
+{
+ for (auto &&type_to_check : valid_types)
+ {
+ if (isValidType(idx, type_to_check))
+ {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void OperationValidator::visit(const operation::AddN &node)
+{
+ const auto output_index(node.getOutputs().at(0));
+
+ int size = node.getInputs().size();
+ for (int i = 0; i < size; i++)
+ {
+ const auto input_index(node.getInputs().at(i));
+ OP_REQUIRES(isValidType(input_index, {DataType::FLOAT32, DataType::INT32}));
+ OP_REQUIRES(isSameType(input_index, output_index));
+ }
+}
+
+void OperationValidator::visit(const operation::ArgMinMax &node)
+{
+ const auto input_index(node.getInputs().at(operation::ArgMinMax::Input::INPUT));
+ const auto axis_index(node.getInputs().at(operation::ArgMinMax::Input::AXIS));
+ const auto output_index(node.getOutputs().at(0));
+ const auto output_type = node.param().output_type;
+
+ OP_REQUIRES(isValidType(input_index, {DataType::FLOAT32, DataType::INT32, DataType::UINT8,
+ DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM}));
+ OP_REQUIRES(isValidType(axis_index, {DataType::INT32, DataType::INT64}));
+ OP_REQUIRES(isValidType(output_index, {DataType::INT32, DataType::INT64}));
+ OP_REQUIRES(isValidType(output_index, output_type));
+}
+
+void OperationValidator::visit(const operation::BatchMatMul &node)
+{
+ const auto lhs_index(node.getInputs().at(operation::BatchMatMul::Input::LHS));
+ const auto rhs_index(node.getInputs().at(operation::BatchMatMul::Input::RHS));
+ const auto output_index(node.getOutputs().at(0));
+
+ // Constant lhs and rhs is not implemented yet
+ OP_REQUIRES(!isConstant(lhs_index) && !isConstant(rhs_index));
+
+ // Allow hybrid quantization (lhs: float / rhs: qint8 / out: float)
+ OP_REQUIRES(isValidType(lhs_index, {DataType::FLOAT32, DataType::QUANT_INT8_ASYMM}));
+ OP_REQUIRES(isSameType(lhs_index, rhs_index) ||
+ ((operandType(lhs_index) == DataType::FLOAT32) &&
+ (operandType(rhs_index) == DataType::QUANT_INT8_ASYMM)));
+ OP_REQUIRES(isSameType(lhs_index, output_index));
+}
+
+void OperationValidator::visit(const operation::BatchToSpaceND &node)
+{
+ const auto input_index{node.getInputs().at(operation::BatchToSpaceND::Input::INPUT)};
+ const auto output_index{node.getOutputs().at(0)};
+
+ OP_REQUIRES(isSameType(input_index, output_index));
+}
+
+void OperationValidator::visit(const operation::BinaryArithmetic &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(operation::BinaryArithmetic::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(operation::BinaryArithmetic::Input::RHS)};
+
+ OP_REQUIRES(isSameType(lhs_index, rhs_index));
+ OP_REQUIRES(isSameType(lhs_index, output_index));
+}
+
+void OperationValidator::visit(const operation::Comparison &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+
+ const auto lhs_index{node.getInputs().at(operation::Comparison::Input::INPUT0)};
+ const auto rhs_index{node.getInputs().at(operation::Comparison::Input::INPUT1)};
+
+ OP_REQUIRES(isSameType(lhs_index, rhs_index));
+ OP_REQUIRES(isValidType(output_index, DataType::BOOL8));
+}
+
+void OperationValidator::visit(const operation::Concat &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+
+ for (auto &&input_index : node.getInputs())
+ {
+ OP_REQUIRES(isSameType(input_index, output_index));
+
+ // Int8 quantization requires same scale and zero point
+ if (isValidType(output_index, DataType::QUANT_INT8_ASYMM))
+ {
+ OP_REQUIRES(isSameQuantParam(input_index, output_index));
+ }
+ }
+}
+
+void OperationValidator::visit(const operation::Conv2D &node)
+{
+ const auto input_index{node.getInputs().at(operation::Conv2D::Input::INPUT)};
+ const auto kernel_index{node.getInputs().at(operation::Conv2D::Input::KERNEL)};
+ const auto output_index{node.getOutputs().at(0)};
+
+ uint32_t stride_horizontal = node.param().stride.horizontal;
+ uint32_t stride_vertical = node.param().stride.vertical;
+ uint32_t dilation_width = node.param().dilation.width_factor;
+ uint32_t dilation_height = node.param().dilation.height_factor;
+
+ OP_REQUIRES((stride_horizontal > 0) && (stride_vertical > 0));
+ OP_REQUIRES((dilation_width > 0) && (dilation_height > 0));
+ OP_REQUIRES(isSameType(input_index, output_index));
+
+ if (isConstant(kernel_index) && operandType(kernel_index) == DataType::QUANT_INT8_ASYMM)
+ {
+ for (const auto zeropoint : _operands.at(kernel_index).typeInfo().zero_points())
+ OP_REQUIRES(zeropoint == 0);
+ }
+}
+
+void OperationValidator::visit(const operation::DepthToSpace &node)
+{
+ const auto input_index{node.getInputs().at(operation::DepthToSpace::Input::INPUT)};
+ const auto output_index{node.getOutputs().at(0)};
+
+ int32_t block_size = node.param().block_size;
+
+ OP_REQUIRES(isValidType(input_index, {DataType::FLOAT32, DataType::INT32, DataType::INT64,
+ DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM}));
+ OP_REQUIRES(isSameType(input_index, output_index));
+
+ OP_REQUIRES(block_size > 0);
+}
+
+void OperationValidator::visit(const operation::DetectionPostProcess &node)
+{
+ const auto &param = node.param();
+
+ // FIXME: number of classes should be 1 for now.
+ OP_REQUIRES(param.num_classes == 1);
+}
+
+void OperationValidator::visit(const operation::DepthwiseConv2D &node)
+{
+ const auto input_index{node.getInputs().at(operation::DepthwiseConv2D::Input::INPUT)};
+ const auto kernel_index{node.getInputs().at(operation::DepthwiseConv2D::Input::KERNEL)};
+ const auto output_index{node.getOutputs().at(0)};
+
+ uint32_t stride_horizontal = node.param().stride.horizontal;
+ uint32_t stride_vertical = node.param().stride.vertical;
+ uint32_t dilation_width = node.param().dilation.width_factor;
+ uint32_t dilation_height = node.param().dilation.height_factor;
+
+ OP_REQUIRES((stride_horizontal > 0) && (stride_vertical > 0));
+ OP_REQUIRES((dilation_width > 0) && (dilation_height > 0));
+ OP_REQUIRES(isSameType(input_index, output_index));
+
+ if (isConstant(kernel_index) && operandType(kernel_index) == DataType::QUANT_INT8_ASYMM)
+ {
+ for (const auto zeropoint : _operands.at(kernel_index).typeInfo().zero_points())
+ OP_REQUIRES(zeropoint == 0);
+ }
+}
+
+void OperationValidator::visit(const operation::ElementwiseActivation &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(0)};
+
+ // Check if I/O types match
+ OP_REQUIRES(isSameType(output_index, input_index));
+
+ switch (node.param().op_type)
+ {
+ case operation::ElementwiseActivation::Type::ELU:
+ OP_REQUIRES(isValidType(input_index, DataType::FLOAT32));
+ break;
+ case operation::ElementwiseActivation::Type::LEAKY_RELU:
+ OP_REQUIRES(
+ isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM,
+ DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM}));
+ break;
+ case operation::ElementwiseActivation::Type::LOGISTIC:
+ OP_REQUIRES(
+ isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM,
+ DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM}));
+ break;
+ case operation::ElementwiseActivation::Type::RELU:
+ OP_REQUIRES(isValidType(
+ input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM}));
+ break;
+ case operation::ElementwiseActivation::Type::TANH:
+ OP_REQUIRES(
+ isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM,
+ DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM}));
+ break;
+ }
+}
+
+void OperationValidator::visit(const operation::ElementwiseBinary &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(operation::ElementwiseBinary::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(operation::ElementwiseBinary::Input::RHS)};
+
+ OP_REQUIRES(isSameType(lhs_index, rhs_index));
+ OP_REQUIRES(isSameType(lhs_index, output_index));
+
+ const auto op_type = node.param().op_type;
+ if (op_type == operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND ||
+ op_type == operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR)
+ {
+ OP_REQUIRES(isValidType(lhs_index, DataType::BOOL8));
+ }
+}
+
+void OperationValidator::visit(const operation::ElementwiseUnary &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(operation::ElementwiseUnary::Input::INPUT)};
+
+ // Check if I/O types match
+ if (node.param().op_type == operation::ElementwiseUnary::Type::DEQUANTIZE)
+ {
+ // NNAPI allow QUANT_INT8_SYMM type input
+ OP_REQUIRES(isValidType(input_index, {DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_SYMM,
+ DataType::QUANT_INT8_ASYMM}));
+ OP_REQUIRES(isValidType(output_index, DataType::FLOAT32));
+ }
+ else if (node.param().op_type == operation::ElementwiseUnary::Type::QUANTIZE)
+ {
+ OP_REQUIRES(isValidType(
+ input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM}));
+ OP_REQUIRES(
+ isValidType(output_index, {DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM}));
+ }
+ else if (node.param().op_type == operation::ElementwiseUnary::Type::FLOOR)
+ {
+ OP_REQUIRES(isValidType(input_index, DataType::FLOAT32));
+ OP_REQUIRES(isSameType(output_index, input_index));
+ }
+ else if (node.param().op_type != operation::ElementwiseUnary::Type::CAST)
+ {
+ OP_REQUIRES(isSameType(output_index, input_index));
+ }
+}
+
+void OperationValidator::visit(const operation::EmbeddingLookup &node)
+{
+ const auto lookups_index{node.getInputs().at(operation::EmbeddingLookup::Input::LOOKUPS)};
+ const auto values_index{node.getInputs().at(operation::EmbeddingLookup::Input::VALUES)};
+ const auto output_index{node.getOutputs().at(0)};
+
+ OP_REQUIRES(isValidType(lookups_index, DataType::INT32));
+
+ // TFLite: Allow hybrid type - value table & output
+ // NNAPI: Require same value table and output type
+ OP_REQUIRES(
+ isSameType(values_index, output_index) ||
+ (isValidType(output_index, DataType::FLOAT32) &&
+ (isValidType(values_index, {DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT8_SYMM}))));
+}
+
+void OperationValidator::visit(const operation::ExpandDims &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(operation::ExpandDims::Input::INPUT)};
+ const auto axis_index{node.getInputs().at(operation::ExpandDims::Input::AXIS)};
+
+ OP_REQUIRES(isSameType(output_index, input_index));
+ OP_REQUIRES(isValidType(axis_index, {DataType::INT32, DataType::INT64}));
+}
+
+void OperationValidator::visit(const operation::Fill &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(operation::Fill::Input::SHAPE)};
+ const auto value_index{node.getInputs().at(operation::Fill::Input::VALUE)};
+
+ OP_REQUIRES(isSameType(output_index, value_index));
+ OP_REQUIRES(isValidType(input_index, {DataType::INT32, DataType::INT64}));
+ OP_REQUIRES(isValidType(output_index,
+ {DataType::FLOAT32, DataType::INT32, DataType::INT64, DataType::BOOL8}));
+}
+
+void OperationValidator::visit(const operation::HashtableLookup &node)
+{
+ const auto hits_index{node.getOutputs().at(operation::HashtableLookup::Output::HITS)};
+ const auto lookups_index{node.getInputs().at(operation::HashtableLookup::Input::LOOKUPS)};
+ const auto keys_index{node.getInputs().at(operation::HashtableLookup::Input::KEYS)};
+
+ OP_REQUIRES(isValidType(lookups_index, DataType::INT32));
+ OP_REQUIRES(isValidType(keys_index, DataType::INT32));
+ OP_REQUIRES(isValidType(hits_index, DataType::QUANT_UINT8_ASYMM));
+}
+
+void OperationValidator::visit(const operation::Pack &node)
+{
+ const auto num{node.param().num};
+
+ OP_REQUIRES(num == static_cast<int32_t>(node.getInputs().size()));
+}
+
+void OperationValidator::visit(const operation::Pad &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(operation::Pad::Input::INPUT)};
+ const auto pad_index{node.getInputs().at(operation::Pad::Input::PAD)};
+ bool isQuantType =
+ isValidType(output_index, {DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM});
+ bool isPadV2 = node.getInputs().size() == 3 ? true : false;
+
+ OP_REQUIRES(isValidType(pad_index, DataType::INT32));
+ OP_REQUIRES(isSameType(input_index, output_index));
+
+ if (isQuantType)
+ OP_REQUIRES(isSameQuantParam(input_index, output_index));
+
+ if (isPadV2)
+ {
+ const auto value_index{node.getInputs().at(operation::Pad::Input::VALUE)};
+ const bool cond_same = isSameType(input_index, value_index);
+ const bool cond_same_quant = (!isQuantType || isSameQuantParam(input_index, value_index));
+ const auto input_t = operandType(input_index);
+ const auto value_t = operandType(value_index);
+ // NNAPI accepts this case. scale and zeroPoint are assumed to be the same as in input0.
+ const bool cond_quant8 =
+ ((input_t == DataType::QUANT_UINT8_ASYMM || input_t == DataType::QUANT_INT8_ASYMM) &&
+ value_t == DataType::INT32);
+ OP_REQUIRES((cond_same && cond_same_quant) || cond_quant8);
+ }
+}
+
+void OperationValidator::visit(const operation::Rank &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+
+ OP_REQUIRES(isValidType(output_index, DataType::INT32));
+}
+
+void OperationValidator::visit(const operation::ResizeBilinear &node)
+{
+ auto align_corners = node.param().align_corners;
+ auto half_pixel_centers = node.param().half_pixel_centers;
+
+ OP_REQUIRES(!align_corners || !half_pixel_centers);
+}
+
+void OperationValidator::visit(const operation::Reverse &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(operation::Reverse::Input::INPUT)};
+ const auto axis_index{node.getInputs().at(operation::Reverse::Input::AXIS)};
+
+ OP_REQUIRES(isValidType(axis_index, DataType::INT32));
+ OP_REQUIRES(isSameType(output_index, input_index));
+}
+
+void OperationValidator::visit(const operation::Select &node)
+{
+ const auto condition_index{node.getInputs().at(operation::Select::Input::CONDITION)};
+ const auto input_true_index{node.getInputs().at(operation::Select::Input::INPUT_TRUE)};
+ const auto input_false_index{node.getInputs().at(operation::Select::Input::INPUT_FALSE)};
+
+ OP_REQUIRES(isValidType(condition_index, DataType::BOOL8));
+ OP_REQUIRES(isSameType(input_true_index, input_false_index));
+}
+
+void OperationValidator::visit(const operation::Shape &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+
+ OP_REQUIRES(isValidType(output_index, {DataType::UINT32, DataType::INT32, DataType::INT64}));
+}
+
+void OperationValidator::visit(const operation::Slice &node)
+{
+ const auto begins_index{node.getInputs().at(operation::Slice::BEGINS)};
+ const auto sizes_index{node.getInputs().at(operation::Slice::SIZES)};
+
+ OP_REQUIRES(isValidType(begins_index, {DataType::INT32, DataType::INT64}));
+ OP_REQUIRES(isSameType(begins_index, sizes_index));
+}
+
+void OperationValidator::visit(const operation::Softmax &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(operation::Softmax::INPUT)};
+
+ OP_REQUIRES(isSameType(input_index, output_index));
+ OP_REQUIRES(isValidType(
+ output_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM}));
+}
+
+void OperationValidator::visit(const operation::SpaceToBatchND &node)
+{
+ const auto block_size_index{node.getInputs().at(operation::SpaceToBatchND::Input::BLOCK_SIZE)};
+ const auto paddings_index{node.getInputs().at(operation::SpaceToBatchND::Input::PADDINGS)};
+
+ // Non-constant block_size and padding is not implemented yet
+ OP_REQUIRES(isConstant(block_size_index));
+ OP_REQUIRES(isConstant(paddings_index));
+}
+
+void OperationValidator::visit(const operation::SpaceToDepth &node)
+{
+ const auto block_size = node.param().block_size;
+ OP_REQUIRES(block_size >= 1);
+}
+
+void OperationValidator::visit(const operation::Split &node)
+{
+ const auto num_splits = node.param().num_splits;
+
+ OP_REQUIRES(num_splits > 0 && num_splits <= 0xFFFF);
+ OP_REQUIRES(node.getOutputs().size() == static_cast<uint32_t>(num_splits));
+}
+
+void OperationValidator::visit(const operation::SquaredDifference &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(operation::SquaredDifference::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(operation::SquaredDifference::Input::RHS)};
+
+ OP_REQUIRES(isSameType(output_index, lhs_index));
+ OP_REQUIRES(isSameType(lhs_index, rhs_index));
+}
+
+void OperationValidator::visit(const operation::StatelessRandomUniform &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto shape_index{node.getInputs().at(operation::StatelessRandomUniform::Input::SHAPE)};
+ const auto seed_index{node.getInputs().at(operation::StatelessRandomUniform::Input::SEED)};
+
+ OP_REQUIRES(isValidType(output_index, DataType::FLOAT32));
+ OP_REQUIRES(isValidType(shape_index, DataType::INT32));
+ OP_REQUIRES(isValidType(seed_index, DataType::INT32));
+}
+
+void OperationValidator::visit(const operation::StridedSlice &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(operation::StridedSlice::Input::INPUT)};
+
+ OP_REQUIRES(isSameType(output_index, input_index));
+}
+
+void OperationValidator::visit(const operation::TransposeConv &node)
+{
+ OP_REQUIRES((node.param().padding.type == PaddingType::SAME) ||
+ (node.param().padding.type == PaddingType::VALID));
+}
+
+void OperationValidator::visit(const operation::Unpack &node)
+{
+ const auto num{node.param().num};
+ OP_REQUIRES(num == static_cast<int32_t>(node.getOutputs().size()));
+}
+
+void OperationValidator::visit(const operation::While &node)
+{
+ OP_REQUIRES(node.getInputs().size() == node.getOutputs().size());
+}
+
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/OperationValidator.h b/runtime/onert/core/src/ir/OperationValidator.h
new file mode 100644
index 000000000..b9bcc4ee8
--- /dev/null
+++ b/runtime/onert/core/src/ir/OperationValidator.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_VALIDATOR_H__
+#define __ONERT_IR_OPERATION_VALIDATOR_H__
+
+#include "ir/OperationVisitor.h"
+#include "ir/Operations.h"
+#include "ir/Operands.h"
+
+namespace onert
+{
+namespace ir
+{
+class Graph;
+class Operands;
+} // namespace ir
+} // namespace onert
+
+namespace onert
+{
+namespace ir
+{
+
+class OperationValidator : public OperationVisitor
+{
+public:
+ OperationValidator(void) = delete;
+ OperationValidator(const Graph &graph);
+
+public:
+ void operator()();
+
+public:
+ void visit(const operation::AddN &node) override;
+ void visit(const operation::ArgMinMax &node) override;
+ void visit(const operation::BatchMatMul &node) override;
+ void visit(const operation::BatchToSpaceND &node) override;
+ void visit(const operation::BinaryArithmetic &node) override;
+ void visit(const operation::Comparison &node) override;
+ void visit(const operation::Concat &node) override;
+ void visit(const operation::Conv2D &node) override;
+ void visit(const operation::DepthToSpace &node) override;
+ void visit(const operation::DepthwiseConv2D &node) override;
+ void visit(const operation::DetectionPostProcess &node) override;
+ void visit(const operation::ElementwiseActivation &node) override;
+ void visit(const operation::ElementwiseBinary &node) override;
+ void visit(const operation::ElementwiseUnary &node) override;
+ void visit(const operation::EmbeddingLookup &node) override;
+ void visit(const operation::ExpandDims &node) override;
+ void visit(const operation::Fill &node) override;
+ void visit(const operation::HashtableLookup &node) override;
+ void visit(const operation::Pack &node) override;
+ void visit(const operation::Pad &node) override;
+ void visit(const operation::Rank &node) override;
+ void visit(const operation::ResizeBilinear &node) override;
+ void visit(const operation::Reverse &node) override;
+ void visit(const operation::Select &node) override;
+ void visit(const operation::Shape &node) override;
+ void visit(const operation::Slice &node) override;
+ void visit(const operation::Softmax &node) override;
+ void visit(const operation::SpaceToBatchND &node) override;
+ void visit(const operation::SpaceToDepth &node) override;
+ void visit(const operation::Split &node) override;
+ void visit(const operation::SquaredDifference &node) override;
+ void visit(const operation::StatelessRandomUniform &node) override;
+ void visit(const operation::StridedSlice &node) override;
+ void visit(const operation::TransposeConv &node) override;
+ void visit(const operation::Unpack &node) override;
+ void visit(const operation::While &node) override;
+
+private:
+ DataType operandType(const OperandIndex &idx);
+ bool isConstant(const OperandIndex &idx);
+ bool isSameType(const OperandIndex &idx1, const OperandIndex &idx2);
+ bool isSameQuantParam(const OperandIndex &idx1, const OperandIndex &idx2);
+ bool isValidType(const OperandIndex &idx, const DataType &type);
+ bool isValidType(const OperandIndex &idx, std::initializer_list<DataType> valid_types);
+
+private:
+ const Operations &_operations;
+ const Operands &_operands;
+};
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_VALIDATOR_H__
diff --git a/runtime/onert/core/src/ir/Operations.cc b/runtime/onert/core/src/ir/Operations.cc
index 64d0bd6f0..1b4691f58 100644
--- a/runtime/onert/core/src/ir/Operations.cc
+++ b/runtime/onert/core/src/ir/Operations.cc
@@ -25,12 +25,9 @@ namespace ir
Operations::Operations(const Operations &obj)
{
- obj.iterate([&](const OperationIndex &index, const Operation &op) {
- OperationCloner cloner;
- op.accept(cloner);
- _objects.emplace(index, cloner.releaseClone());
- });
- _index_count = obj._index_count;
+ obj.iterate(
+ [&](const OperationIndex &index, const IOperation &op) { _objects.emplace(index, clone(op)); });
+ _next_index = obj._next_index;
}
} // namespace ir
diff --git a/runtime/onert/core/src/ir/Operations.test.cc b/runtime/onert/core/src/ir/Operations.test.cc
new file mode 100644
index 000000000..e57872689
--- /dev/null
+++ b/runtime/onert/core/src/ir/Operations.test.cc
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Operations.h"
+
+#include "MockNode.h"
+
+#include <gtest/gtest.h>
+
+using onert::ir::Operation;
+using onert::ir::OperationIndex;
+using onert::ir::Operations;
+
+TEST(ir_Operations, basic)
+{
+ Operations ops;
+ ops.push(std::unique_ptr<Operation>(new onert_test::ir::SimpleMock({1, 2, 3, 4}, {5, 6, 7})));
+ OperationIndex idx{0u};
+ ASSERT_EQ(ops.at(idx).getInputs().size(), 4);
+ ASSERT_EQ(ops.at(idx).getOutputs().size(), 3);
+}
+
+TEST(ir_Operations, neg_at)
+{
+ Operations ops;
+ ops.push(std::unique_ptr<Operation>(new onert_test::ir::SimpleMock({1, 2, 3, 4}, {5, 6, 7})));
+ OperationIndex idx{99u};
+ EXPECT_THROW(ops.at(idx), std::out_of_range);
+}
diff --git a/runtime/onert/core/src/ir/Padding.cc b/runtime/onert/core/src/ir/Padding.cc
index d74f80217..b2b004e7a 100644
--- a/runtime/onert/core/src/ir/Padding.cc
+++ b/runtime/onert/core/src/ir/Padding.cc
@@ -66,14 +66,14 @@ inline ExplicitPadding samePaddingUsingIFM(const FeatureShape &ifm_shape, const
const int32_t vertical_expected_output = (ifm_shape.H + stride.vertical - 1) / stride.vertical;
const int32_t horizontal_expected_output =
- (ifm_shape.W + stride.horizontal - 1) / stride.horizontal;
+ (ifm_shape.W + stride.horizontal - 1) / stride.horizontal;
const int32_t vertical_needed_input =
- (vertical_expected_output - 1) * stride.vertical + effective_filter_h_size;
+ (vertical_expected_output - 1) * stride.vertical + effective_filter_h_size;
const int32_t vertical_total_padding = std::max(0, vertical_needed_input - ifm_shape.H);
const int32_t horizontal_needed_input =
- (horizontal_expected_output - 1) * stride.horizontal + effective_filter_w_size;
+ (horizontal_expected_output - 1) * stride.horizontal + effective_filter_w_size;
const int32_t horizontal_total_padding = std::max(0, horizontal_needed_input - ifm_shape.W);
padding.top = vertical_total_padding / 2;
@@ -90,7 +90,7 @@ inline ExplicitPadding samePadding(const FeatureShape &ifm_shape, const FeatureS
{
const int32_t vertical_expected_output = (ifm_shape.H + stride.vertical - 1) / stride.vertical;
const int32_t horizontal_expected_output =
- (ifm_shape.W + stride.horizontal - 1) / stride.horizontal;
+ (ifm_shape.W + stride.horizontal - 1) / stride.horizontal;
assert(vertical_expected_output == ofm_shape.H);
assert(horizontal_expected_output == ofm_shape.W);
@@ -129,7 +129,7 @@ Padding::Padding(PaddingType paddingType) : type{paddingType}, param{0, 0, 0, 0}
}
Padding::Padding(uint32_t left, uint32_t right, uint32_t top, uint32_t bottom)
- : type{PaddingType::EXPLICIT}, param{left, right, top, bottom}
+ : type{PaddingType::EXPLICIT}, param{left, right, top, bottom}
{
// DO NOTHING
}
diff --git a/runtime/onert/core/src/ir/Shape.cc b/runtime/onert/core/src/ir/Shape.cc
index 322df7b4c..e4e4c154b 100644
--- a/runtime/onert/core/src/ir/Shape.cc
+++ b/runtime/onert/core/src/ir/Shape.cc
@@ -26,10 +26,10 @@ namespace onert
namespace ir
{
-int32_t const Shape::UNSPECIFIED_DIM = -1;
+int32_t const Shape::kUnspecifiedDim = -1;
// NNFW_MAX_RANK is 6
-int32_t const Shape::MAX_RANK = 6;
+int32_t const Shape::kMaxRank = 6;
FeatureShape Shape::asFeature(Layout layout) const
{
@@ -80,34 +80,37 @@ uint64_t Shape::num_elements() const
{
// if dimension is 0, it means unspecified and cannot calculate the total number of elements
if (std::any_of(_dimensions.begin(), _dimensions.end(),
- [](const int32_t &v) { return v == UNSPECIFIED_DIM; }))
+ [](const int32_t &v) { return v == kUnspecifiedDim; }))
throw std::runtime_error("num_elements() cannot calculate when any dimension is unspecified");
return std::accumulate(_dimensions.cbegin(), _dimensions.cend(), UINT64_C(1),
std::multiplies<uint64_t>());
}
-Shape permuteShape(const Shape &shape, Layout frontend_layout, Layout backend_layout)
+Shape permuteShape(const Shape &shape, Layout from, Layout to)
{
- assert(shape.rank() <= Shape::MAX_RANK);
- Shape backend_shape{shape};
- if (shape.rank() >= 4 && frontend_layout == Layout::NHWC && backend_layout == Layout::NCHW)
+ assert(shape.rank() <= Shape::kMaxRank);
+ Shape ret{shape};
+ if (from == to)
+ return ret;
+ if (shape.rank() < 4)
+ return ret;
+ // Permutation changing layout beyond 4-D is not supported yet
+ assert(shape.rank() <= 4);
+ if (from == Layout::NHWC && to == Layout::NCHW)
{
- // Permutation changing layout beyond 4-D is not supported yet
- assert(shape.rank() <= 4);
- backend_shape.dim(1) = shape.dim(3);
- backend_shape.dim(2) = shape.dim(1);
- backend_shape.dim(3) = shape.dim(2);
+ ret.dim(1) = shape.dim(3);
+ ret.dim(2) = shape.dim(1);
+ ret.dim(3) = shape.dim(2);
}
- else if (shape.rank() >= 4 && frontend_layout == Layout::NCHW && backend_layout == Layout::NHWC)
+ else if (from == Layout::NCHW && to == Layout::NHWC)
{
- // Permutation changing layout beyond 4-D is not supported yet
- assert(shape.rank() <= 4);
- backend_shape.dim(1) = shape.dim(2);
- backend_shape.dim(2) = shape.dim(3);
- backend_shape.dim(3) = shape.dim(1);
+ ret.dim(1) = shape.dim(2);
+ ret.dim(2) = shape.dim(3);
+ ret.dim(3) = shape.dim(1);
}
- return backend_shape;
+ // Other cases(either `from` or `to` is UNKNOWN), just return the original shape
+ return ret;
}
} // namespace ir
diff --git a/runtime/onert/core/src/ir/Shape.test.cc b/runtime/onert/core/src/ir/Shape.test.cc
new file mode 100644
index 000000000..4788522d3
--- /dev/null
+++ b/runtime/onert/core/src/ir/Shape.test.cc
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Shape.h"
+
+#include <gtest/gtest.h>
+
+TEST(ShapeTest, basic_test)
+{
+ {
+ onert::ir::Shape shape(3);
+
+ shape.dim(0) = 1;
+ shape.dim(1) = 2;
+ shape.dim(2) = 3;
+
+ ASSERT_EQ(shape.rank(), 3);
+ ASSERT_EQ(shape.num_elements(), 6);
+ ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), false);
+ ASSERT_EQ(shape.hasUnspecifiedDims(), false);
+ }
+ {
+ onert::ir::Shape shape; // scalar or rank is unspecified
+
+ ASSERT_EQ(shape.rank(), 0);
+ ASSERT_EQ(shape.num_elements(), 1);
+ ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), true);
+ ASSERT_EQ(shape.hasUnspecifiedDims(), false);
+ }
+}
+
+TEST(ShapeTest, neg_basic_test)
+{
+ {
+ onert::ir::Shape shape(2);
+
+ shape.dim(0) = 1;
+ shape.dim(1) = onert::ir::Shape::kUnspecifiedDim;
+
+ ASSERT_EQ(shape.rank(), 2);
+ ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), false);
+ ASSERT_EQ(shape.hasUnspecifiedDims(), true);
+ EXPECT_ANY_THROW(shape.num_elements());
+ }
+}
diff --git a/runtime/onert/core/src/ir/TypeInfo.cc b/runtime/onert/core/src/ir/TypeInfo.cc
index ab8af287e..5d1c7ba8b 100644
--- a/runtime/onert/core/src/ir/TypeInfo.cc
+++ b/runtime/onert/core/src/ir/TypeInfo.cc
@@ -28,7 +28,7 @@ bool operator==(const TypeInfo &lhs, const TypeInfo &rhs)
return false;
}
- if (lhs.offset() != rhs.offset())
+ if (lhs.zero_point() != rhs.zero_point())
{
return false;
}
diff --git a/runtime/onert/core/src/ir/operation/AddN.cc b/runtime/onert/core/src/ir/operation/AddN.cc
new file mode 100644
index 000000000..a51e12dff
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/AddN.cc
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/AddN.h"
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void AddN::accept(OperationVisitor &v) const { v.visit(*this); }
+
+AddN::AddN(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(inputs.size()), inputs, outputs}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ArgMax.cc b/runtime/onert/core/src/ir/operation/ArgMinMax.cc
index 1275ae43a..2f18ff2e2 100644
--- a/runtime/onert/core/src/ir/operation/ArgMax.cc
+++ b/runtime/onert/core/src/ir/operation/ArgMinMax.cc
@@ -14,10 +14,7 @@
* limitations under the License.
*/
-#include "ir/operation/ArgMax.h"
-
-#include <cassert>
-
+#include "ir/operation/ArgMinMax.h"
#include "ir/OperationVisitor.h"
namespace onert
@@ -27,11 +24,11 @@ namespace ir
namespace operation
{
-void ArgMax::accept(OperationVisitor &v) const { v.visit(*this); }
+void ArgMinMax::accept(OperationVisitor &v) const { v.visit(*this); }
-ArgMax::ArgMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+ArgMinMax::ArgMinMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
{
}
diff --git a/runtime/onert/core/src/ir/operation/BCQFullyConnected.cc b/runtime/onert/core/src/ir/operation/BCQFullyConnected.cc
index 9dc54e6e9..ccda674ad 100644
--- a/runtime/onert/core/src/ir/operation/BCQFullyConnected.cc
+++ b/runtime/onert/core/src/ir/operation/BCQFullyConnected.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/BCQFullyConnected.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -31,7 +28,7 @@ void BCQFullyConnected::accept(OperationVisitor &v) const { v.visit(*this); }
BCQFullyConnected::BCQFullyConnected(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs, const Param &param)
- : Operation{OperandConstraint::createExact(5u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(5u), inputs, outputs}, _param{param}
{
}
diff --git a/runtime/onert/core/src/ir/operation/BCQGather.cc b/runtime/onert/core/src/ir/operation/BCQGather.cc
index 80efa6460..1ca5b0c9f 100644
--- a/runtime/onert/core/src/ir/operation/BCQGather.cc
+++ b/runtime/onert/core/src/ir/operation/BCQGather.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/BCQGather.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -31,7 +28,7 @@ void BCQGather::accept(OperationVisitor &v) const { v.visit(*this); }
BCQGather::BCQGather(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createExact(4u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(4u), inputs, outputs}, _param{param}
{
}
diff --git a/runtime/onert/core/src/ir/operation/BatchMatMul.cc b/runtime/onert/core/src/ir/operation/BatchMatMul.cc
index b9616158d..20c5682f9 100644
--- a/runtime/onert/core/src/ir/operation/BatchMatMul.cc
+++ b/runtime/onert/core/src/ir/operation/BatchMatMul.cc
@@ -28,7 +28,7 @@ void BatchMatMul::accept(OperationVisitor &v) const { v.visit(*this); }
BatchMatMul::BatchMatMul(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
{
}
diff --git a/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc b/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc
index 9ef2b125f..3c5578ac4 100644
--- a/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc
+++ b/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/BatchToSpaceND.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -31,7 +28,7 @@ void BatchToSpaceND::accept(OperationVisitor &v) const { v.visit(*this); }
BatchToSpaceND::BatchToSpaceND(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}
+ : Operation{OperandConstraint::createInRange(2u, 3u), inputs, outputs}
{
}
diff --git a/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc b/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc
index 2b1422c73..5eb3fc3d7 100644
--- a/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc
+++ b/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc
@@ -15,12 +15,10 @@
*/
#include "ir/operation/BinaryArithmetic.h"
+#include "ir/OperationVisitor.h"
-#include <cassert>
#include <unordered_map>
-#include "ir/OperationVisitor.h"
-
namespace onert
{
namespace ir
@@ -32,7 +30,7 @@ void BinaryArithmetic::accept(OperationVisitor &v) const { v.visit(*this); }
BinaryArithmetic::BinaryArithmetic(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs, const Param &param)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
{
}
@@ -40,10 +38,10 @@ std::string BinaryArithmetic::name() const
{
using ArithmeticType = onert::ir::operation::BinaryArithmetic::ArithmeticType;
static const std::unordered_map<ArithmeticType, std::string> name_map{
- {ArithmeticType::ADD, std::string{"Add"}},
- {ArithmeticType::SUB, std::string{"Sub"}},
- {ArithmeticType::MUL, std::string{"Mul"}},
- {ArithmeticType::DIV, std::string{"Div"}}};
+ {ArithmeticType::ADD, std::string{"Add"}},
+ {ArithmeticType::SUB, std::string{"Sub"}},
+ {ArithmeticType::MUL, std::string{"Mul"}},
+ {ArithmeticType::DIV, std::string{"Div"}}};
return name_map.at(_param.arithmetic_type);
}
diff --git a/runtime/onert/core/src/ir/operation/BroadcastTo.cc b/runtime/onert/core/src/ir/operation/BroadcastTo.cc
index a8f5e59cf..eab6c0611 100644
--- a/runtime/onert/core/src/ir/operation/BroadcastTo.cc
+++ b/runtime/onert/core/src/ir/operation/BroadcastTo.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/BroadcastTo.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -29,7 +26,7 @@ namespace operation
void BroadcastTo::accept(OperationVisitor &v) const { v.visit(*this); }
BroadcastTo::BroadcastTo(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
{
}
diff --git a/runtime/onert/core/src/ir/operation/Bulk.cc b/runtime/onert/core/src/ir/operation/Bulk.cc
new file mode 100644
index 000000000..4b96c9d94
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Bulk.cc
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Bulk.h"
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+void Bulk::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Bulk::Bulk(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Bulk::Param &param)
+ : Operation{OperandConstraint::createAny(), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Comparison.cc b/runtime/onert/core/src/ir/operation/Comparison.cc
index 2f6775411..33365657c 100644
--- a/runtime/onert/core/src/ir/operation/Comparison.cc
+++ b/runtime/onert/core/src/ir/operation/Comparison.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/Comparison.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -31,7 +28,7 @@ void Comparison::accept(OperationVisitor &v) const { v.visit(*this); }
Comparison::Comparison(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
{
}
diff --git a/runtime/onert/core/src/ir/operation/Concat.cc b/runtime/onert/core/src/ir/operation/Concat.cc
index 608bc29a6..3a21e36f2 100644
--- a/runtime/onert/core/src/ir/operation/Concat.cc
+++ b/runtime/onert/core/src/ir/operation/Concat.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/Concat.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -31,7 +28,7 @@ void Concat::accept(OperationVisitor &v) const { v.visit(*this); }
Concat::Concat(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createAtLeast(1u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createAtLeast(1u), inputs, outputs}, _param{param}
{
}
diff --git a/runtime/onert/core/src/ir/operation/Conv2D.cc b/runtime/onert/core/src/ir/operation/Conv2D.cc
index 3a2e1d1fe..d615ae416 100644
--- a/runtime/onert/core/src/ir/operation/Conv2D.cc
+++ b/runtime/onert/core/src/ir/operation/Conv2D.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/Conv2D.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -31,7 +28,7 @@ void Conv2D::accept(OperationVisitor &v) const { v.visit(*this); }
Conv2D::Conv2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
{
}
diff --git a/runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc b/runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc
index 676e039fa..365745ea8 100644
--- a/runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc
+++ b/runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/ConvertFp16ToFp32.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -31,7 +28,7 @@ void ConvertFp16ToFp32::accept(OperationVisitor &v) const { v.visit(*this); }
ConvertFp16ToFp32::ConvertFp16ToFp32(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}
{
}
diff --git a/runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc b/runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc
index bcfcbfc04..d4fc7031c 100644
--- a/runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc
+++ b/runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/ConvertFp32ToFp16.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -31,7 +28,7 @@ void ConvertFp32ToFp16::accept(OperationVisitor &v) const { v.visit(*this); }
ConvertFp32ToFp16::ConvertFp32ToFp16(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}
{
}
diff --git a/runtime/onert/core/src/ir/operation/Custom.cc b/runtime/onert/core/src/ir/operation/Custom.cc
index 25c53e1ba..06c84f81a 100644
--- a/runtime/onert/core/src/ir/operation/Custom.cc
+++ b/runtime/onert/core/src/ir/operation/Custom.cc
@@ -29,7 +29,7 @@ void Custom::accept(OperationVisitor &v) const { v.visit(*this); }
Custom::Custom(OperandConstraint input_constr, const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs, std::string id, const Userdata &userdata)
- : Operation{input_constr, inputs, outputs}, _id(std::move(id)), _userdata(userdata)
+ : Operation{input_constr, inputs, outputs}, _id(std::move(id)), _userdata(userdata)
{
}
diff --git a/runtime/onert/core/src/ir/operation/DepthToSpace.cc b/runtime/onert/core/src/ir/operation/DepthToSpace.cc
index f2d6c7c1b..e3edea777 100644
--- a/runtime/onert/core/src/ir/operation/DepthToSpace.cc
+++ b/runtime/onert/core/src/ir/operation/DepthToSpace.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/DepthToSpace.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -31,7 +28,7 @@ void DepthToSpace::accept(OperationVisitor &v) const { v.visit(*this); }
DepthToSpace::DepthToSpace(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
{
}
diff --git a/runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc b/runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc
index d587a5591..0e7137306 100644
--- a/runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc
+++ b/runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/DepthwiseConv2D.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -31,7 +28,7 @@ void DepthwiseConv2D::accept(OperationVisitor &v) const { v.visit(*this); }
DepthwiseConv2D::DepthwiseConv2D(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs, const Param &param)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
{
}
diff --git a/runtime/onert/core/src/ir/operation/DetectionPostProcess.cc b/runtime/onert/core/src/ir/operation/DetectionPostProcess.cc
new file mode 100644
index 000000000..cd708796d
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/DetectionPostProcess.cc
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/DetectionPostProcess.h"
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+DetectionPostProcess::DetectionPostProcess(const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs, const Param &param)
+ : Operation(OperandConstraint::createExact(3u), inputs, outputs), _param(param)
+{
+}
+
+void DetectionPostProcess::accept(OperationVisitor &v) const { v.visit(*this); }
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Einsum.cc b/runtime/onert/core/src/ir/operation/Einsum.cc
index 3c1473aaa..b50f070e7 100644
--- a/runtime/onert/core/src/ir/operation/Einsum.cc
+++ b/runtime/onert/core/src/ir/operation/Einsum.cc
@@ -28,7 +28,7 @@ void Einsum::accept(OperationVisitor &v) const { v.visit(*this); }
Einsum::Einsum(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createAtLeast(1u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createAtLeast(1u), inputs, outputs}, _param{param}
{
}
diff --git a/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc b/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc
index f6718b656..e83c26e28 100644
--- a/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc
+++ b/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc
@@ -15,12 +15,10 @@
*/
#include "ir/operation/ElementwiseActivation.h"
+#include "ir/OperationVisitor.h"
-#include <cassert>
#include <unordered_map>
-#include "ir/OperationVisitor.h"
-
namespace onert
{
namespace ir
@@ -33,13 +31,14 @@ void ElementwiseActivation::accept(OperationVisitor &v) const { v.visit(*this);
ElementwiseActivation::ElementwiseActivation(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
{
if (param.op_type == Type::LOGISTIC)
{
- assert(param.alpha == 0.0f && param.beta == 0.0f && "Logistic will be supported only as "
- "sigmoid function(L=1, k=1, x0=0). So, do "
- "not use alpha and beta");
+ assert(param.alpha == 0.0f && param.beta == 0.0f &&
+ "Logistic will be supported only as "
+ "sigmoid function(L=1, k=1, x0=0). So, do "
+ "not use alpha and beta");
}
else if (param.op_type == Type::RELU)
{
@@ -47,9 +46,10 @@ ElementwiseActivation::ElementwiseActivation(const OperandIndexSequence &inputs,
}
else if (param.op_type == Type::TANH)
{
- assert(param.alpha == 1.0f && param.beta == 1.0f && "f(x) = alpha * tanh(beta * x), Tanh is "
- "supported only the values of alpha and "
- "beta are 1.f");
+ assert(param.alpha == 1.0f && param.beta == 1.0f &&
+ "f(x) = alpha * tanh(beta * x), Tanh is "
+ "supported only the values of alpha and "
+ "beta are 1.f");
}
}
@@ -57,11 +57,11 @@ std::string ElementwiseActivation::name() const
{
using ElementwiseActivationType = onert::ir::operation::ElementwiseActivation::Type;
static const std::unordered_map<Type, std::string> name_map{
- {ElementwiseActivationType::ELU, "ELU"},
- {ElementwiseActivationType::LOGISTIC, "Logistic"},
- {ElementwiseActivationType::RELU, "ReLU"},
- {ElementwiseActivationType::TANH, "Tanh"},
- {ElementwiseActivationType::LEAKY_RELU, "LeakyRelu"}};
+ {ElementwiseActivationType::ELU, "ELU"},
+ {ElementwiseActivationType::LOGISTIC, "Logistic"},
+ {ElementwiseActivationType::RELU, "ReLU"},
+ {ElementwiseActivationType::TANH, "Tanh"},
+ {ElementwiseActivationType::LEAKY_RELU, "LeakyRelu"}};
return name_map.at(_param.op_type);
}
diff --git a/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc b/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc
index 3287fc0a3..b22bed7bc 100644
--- a/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc
+++ b/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc
@@ -15,12 +15,10 @@
*/
#include "ir/operation/ElementwiseBinary.h"
+#include "ir/OperationVisitor.h"
-#include <cassert>
#include <unordered_map>
-#include "ir/OperationVisitor.h"
-
namespace onert
{
namespace ir
@@ -32,7 +30,7 @@ void ElementwiseBinary::accept(OperationVisitor &v) const { v.visit(*this); }
ElementwiseBinary::ElementwiseBinary(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs, const Param &param)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
{
}
@@ -40,10 +38,11 @@ std::string ElementwiseBinary::name() const
{
using ElementwiseBinaryType = onert::ir::operation::ElementwiseBinary::ElementwiseBinaryType;
static const std::unordered_map<ElementwiseBinaryType, std::string> name_map{
- {ElementwiseBinaryType::LOGICAL_AND, std::string{"LogicalAnd"}},
- {ElementwiseBinaryType::LOGICAL_OR, std::string{"LogicalOr"}},
- {ElementwiseBinaryType::MAX, std::string{"Max"}},
- {ElementwiseBinaryType::MIN, std::string{"Min"}}};
+ {ElementwiseBinaryType::FLOOR_DIV, std::string{"FloorDiv"}},
+ {ElementwiseBinaryType::LOGICAL_AND, std::string{"LogicalAnd"}},
+ {ElementwiseBinaryType::LOGICAL_OR, std::string{"LogicalOr"}},
+ {ElementwiseBinaryType::MAX, std::string{"Max"}},
+ {ElementwiseBinaryType::MIN, std::string{"Min"}}};
return name_map.at(_param.op_type);
}
diff --git a/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc
index 7dfcd4a98..fd463e0fe 100644
--- a/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc
+++ b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc
@@ -15,12 +15,10 @@
*/
#include "ir/operation/ElementwiseUnary.h"
+#include "ir/OperationVisitor.h"
-#include <cassert>
#include <unordered_map>
-#include "ir/OperationVisitor.h"
-
namespace onert
{
namespace ir
@@ -32,7 +30,9 @@ void ElementwiseUnary::accept(OperationVisitor &v) const { v.visit(*this); }
ElementwiseUnary::ElementwiseUnary(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs, const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs,
+ OperandConstraint::createExact(1u)},
+ _param{param}
{
}
@@ -40,23 +40,23 @@ std::string ElementwiseUnary::name() const
{
using ElementwiseUnaryType = onert::ir::operation::ElementwiseUnary::Type;
static const std::unordered_map<ElementwiseUnaryType, std::string> name_map{
- {ElementwiseUnaryType::ABS, std::string{"Abs"}},
- {ElementwiseUnaryType::CAST, std::string{"Cast"}},
- {ElementwiseUnaryType::COS, std::string{"Cos"}},
- {ElementwiseUnaryType::DEQUANTIZE, std::string{"Dequantize"}},
- {ElementwiseUnaryType::ERF, std::string{"Erf"}},
- {ElementwiseUnaryType::EXP, std::string{"Exp"}},
- {ElementwiseUnaryType::FLOOR, std::string{"Floor"}},
- {ElementwiseUnaryType::LOG, std::string{"Log"}},
- {ElementwiseUnaryType::LOGICAL_NOT, std::string{"LogicalNot"}},
- {ElementwiseUnaryType::NEG, std::string{"Neg"}},
- {ElementwiseUnaryType::QUANTIZE, std::string{"Quantize"}},
- {ElementwiseUnaryType::ROUND, std::string{"Round"}},
- {ElementwiseUnaryType::RSQRT, std::string{"RSqrt"}},
- {ElementwiseUnaryType::SIN, std::string{"Sin"}},
- {ElementwiseUnaryType::SQRT, std::string{"Sqrt"}},
- {ElementwiseUnaryType::SQURE, std::string{"Squre"}},
- {ElementwiseUnaryType::ZEROS_LIKE, std::string{"ZerosLike"}}};
+ {ElementwiseUnaryType::ABS, std::string{"Abs"}},
+ {ElementwiseUnaryType::CAST, std::string{"Cast"}},
+ {ElementwiseUnaryType::COS, std::string{"Cos"}},
+ {ElementwiseUnaryType::DEQUANTIZE, std::string{"Dequantize"}},
+ {ElementwiseUnaryType::ERF, std::string{"Erf"}},
+ {ElementwiseUnaryType::EXP, std::string{"Exp"}},
+ {ElementwiseUnaryType::FLOOR, std::string{"Floor"}},
+ {ElementwiseUnaryType::LOG, std::string{"Log"}},
+ {ElementwiseUnaryType::LOGICAL_NOT, std::string{"LogicalNot"}},
+ {ElementwiseUnaryType::NEG, std::string{"Neg"}},
+ {ElementwiseUnaryType::QUANTIZE, std::string{"Quantize"}},
+ {ElementwiseUnaryType::ROUND, std::string{"Round"}},
+ {ElementwiseUnaryType::RSQRT, std::string{"RSqrt"}},
+ {ElementwiseUnaryType::SIN, std::string{"Sin"}},
+ {ElementwiseUnaryType::SQRT, std::string{"Sqrt"}},
+ {ElementwiseUnaryType::SQUARE, std::string{"Square"}},
+ {ElementwiseUnaryType::ZEROS_LIKE, std::string{"ZerosLike"}}};
return name_map.at(_param.op_type);
}
diff --git a/runtime/onert/core/src/ir/operation/EmbeddingLookup.cc b/runtime/onert/core/src/ir/operation/EmbeddingLookup.cc
index b300b004e..66b80b2c5 100644
--- a/runtime/onert/core/src/ir/operation/EmbeddingLookup.cc
+++ b/runtime/onert/core/src/ir/operation/EmbeddingLookup.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/EmbeddingLookup.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -31,7 +28,7 @@ void EmbeddingLookup::accept(OperationVisitor &v) const { v.visit(*this); }
EmbeddingLookup::EmbeddingLookup(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
{
}
diff --git a/runtime/onert/core/src/ir/operation/ExpandDims.cc b/runtime/onert/core/src/ir/operation/ExpandDims.cc
index 3f555bd23..e421bc383 100644
--- a/runtime/onert/core/src/ir/operation/ExpandDims.cc
+++ b/runtime/onert/core/src/ir/operation/ExpandDims.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/ExpandDims.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -30,7 +27,7 @@ namespace operation
void ExpandDims::accept(OperationVisitor &v) const { v.visit(*this); }
ExpandDims::ExpandDims(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
{
}
diff --git a/runtime/onert/core/src/ir/operation/Fill.cc b/runtime/onert/core/src/ir/operation/Fill.cc
index c44f45aab..60355c609 100644
--- a/runtime/onert/core/src/ir/operation/Fill.cc
+++ b/runtime/onert/core/src/ir/operation/Fill.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/Fill.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -30,7 +27,7 @@ namespace operation
void Fill::accept(OperationVisitor &v) const { v.visit(*this); }
Fill::Fill(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
{
}
diff --git a/runtime/onert/core/src/ir/operation/FullyConnected.cc b/runtime/onert/core/src/ir/operation/FullyConnected.cc
index 118ae554a..3533df097 100644
--- a/runtime/onert/core/src/ir/operation/FullyConnected.cc
+++ b/runtime/onert/core/src/ir/operation/FullyConnected.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/FullyConnected.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -31,7 +28,7 @@ void FullyConnected::accept(OperationVisitor &v) const { v.visit(*this); }
FullyConnected::FullyConnected(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs, const Param &param)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createInRange(2u, 3u), inputs, outputs}, _param{param}
{
}
diff --git a/runtime/onert/core/src/ir/operation/FusedBatchNorm.cc b/runtime/onert/core/src/ir/operation/FusedBatchNorm.cc
index 7b9301ea6..b5679f308 100644
--- a/runtime/onert/core/src/ir/operation/FusedBatchNorm.cc
+++ b/runtime/onert/core/src/ir/operation/FusedBatchNorm.cc
@@ -28,7 +28,7 @@ void FusedBatchNorm::accept(OperationVisitor &v) const { v.visit(*this); }
FusedBatchNorm::FusedBatchNorm(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs, const Param &param)
- : Operation{OperandConstraint::createAtLeast(5u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createAtLeast(5u), inputs, outputs}, _param{param}
{
}
diff --git a/runtime/onert/core/src/ir/operation/Gather.cc b/runtime/onert/core/src/ir/operation/Gather.cc
index 11d46e75b..e0c4630a0 100644
--- a/runtime/onert/core/src/ir/operation/Gather.cc
+++ b/runtime/onert/core/src/ir/operation/Gather.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/Gather.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -31,7 +28,7 @@ void Gather::accept(OperationVisitor &v) const { v.visit(*this); }
Gather::Gather(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
{
}
diff --git a/runtime/onert/core/src/ir/operation/HashtableLookup.cc b/runtime/onert/core/src/ir/operation/HashtableLookup.cc
index e9a7a82ff..5d1589cd1 100644
--- a/runtime/onert/core/src/ir/operation/HashtableLookup.cc
+++ b/runtime/onert/core/src/ir/operation/HashtableLookup.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/HashtableLookup.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -31,7 +28,7 @@ void HashtableLookup::accept(OperationVisitor &v) const { v.visit(*this); }
HashtableLookup::HashtableLookup(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(3u), inputs, outputs}
{
}
diff --git a/runtime/onert/core/src/ir/operation/If.cc b/runtime/onert/core/src/ir/operation/If.cc
index 599751dfd..380c87dbe 100644
--- a/runtime/onert/core/src/ir/operation/If.cc
+++ b/runtime/onert/core/src/ir/operation/If.cc
@@ -24,7 +24,7 @@ namespace operation
{
void If::accept(OperationVisitor &v) const { v.visit(*this); }
If::If(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param)
- : Operation{OperandConstraint::createAny(), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createAny(), inputs, outputs}, _param{param}
{
}
} // namespace operation
diff --git a/runtime/onert/core/src/ir/operation/InstanceNorm.cc b/runtime/onert/core/src/ir/operation/InstanceNorm.cc
index 2334560ef..9fb55383e 100644
--- a/runtime/onert/core/src/ir/operation/InstanceNorm.cc
+++ b/runtime/onert/core/src/ir/operation/InstanceNorm.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/InstanceNorm.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -31,7 +28,7 @@ void InstanceNorm::accept(OperationVisitor &v) const { v.visit(*this); }
InstanceNorm::InstanceNorm(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
{
}
diff --git a/runtime/onert/core/src/ir/operation/L2Normalization.cc b/runtime/onert/core/src/ir/operation/L2Normalization.cc
index 9a7d3eb61..6725df596 100644
--- a/runtime/onert/core/src/ir/operation/L2Normalization.cc
+++ b/runtime/onert/core/src/ir/operation/L2Normalization.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/L2Normalization.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -31,7 +28,7 @@ void L2Normalization::accept(OperationVisitor &v) const { v.visit(*this); }
L2Normalization::L2Normalization(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}
{
}
diff --git a/runtime/onert/core/src/ir/operation/LSTM.cc b/runtime/onert/core/src/ir/operation/LSTM.cc
index 30a865326..06e66158b 100644
--- a/runtime/onert/core/src/ir/operation/LSTM.cc
+++ b/runtime/onert/core/src/ir/operation/LSTM.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/LSTM.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -31,8 +28,16 @@ void LSTM::accept(OperationVisitor &v) const { v.visit(*this); }
LSTM::LSTM(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createExact(23u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createInRange(20u, 24u), inputs, outputs}, _param{param}
+{
+}
+
+std::string LSTM::name() const
{
+ if (getOutputs().at(Output::SCRATCH_BUFFER).undefined())
+ return std::string{"UnidirectionalSequenceLSTM"};
+ else
+ return Operation::name();
}
} // namespace operation
diff --git a/runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc b/runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc
index 1ae97c142..73fca9938 100644
--- a/runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc
+++ b/runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/LocalResponseNormalization.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -32,7 +29,7 @@ void LocalResponseNormalization::accept(OperationVisitor &v) const { v.visit(*th
LocalResponseNormalization::LocalResponseNormalization(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
{
}
diff --git a/runtime/onert/core/src/ir/operation/LogSoftmax.cc b/runtime/onert/core/src/ir/operation/LogSoftmax.cc
index 73c6580ec..d580e63e1 100644
--- a/runtime/onert/core/src/ir/operation/LogSoftmax.cc
+++ b/runtime/onert/core/src/ir/operation/LogSoftmax.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/LogSoftmax.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -31,7 +28,7 @@ void LogSoftmax::accept(OperationVisitor &v) const { v.visit(*this); }
LogSoftmax::LogSoftmax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
{
}
diff --git a/runtime/onert/core/src/ir/operation/Loss.cc b/runtime/onert/core/src/ir/operation/Loss.cc
new file mode 100644
index 000000000..fa3520b2c
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Loss.cc
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Loss.h"
+#include "ir/OperationVisitor.h"
+
+#include <unordered_map>
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Loss::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Loss::Loss(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param)
+ : Operation{OperandConstraint::createAtLeast(2u), inputs, outputs}, _param{param}
+{
+ if (param.op_type == Type::CATEGORICAL_CROSSENTROPY)
+ {
+ assert(inputs.size() == 2 && "CategoricalCrossentropy Loss has 2 inputs");
+ }
+}
+
+std::string Loss::name() const
+{
+ using LossType = onert::ir::operation::Loss::Type;
+ static const std::unordered_map<Type, std::string> name_map{
+ {LossType::MEAN_SQUARED_ERROR, "MeanSquaredError Loss"},
+ {LossType::CATEGORICAL_CROSSENTROPY, "CategoricalCrossentropy Loss"}};
+ return name_map.at(_param.op_type);
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/MatrixBandPart.cc b/runtime/onert/core/src/ir/operation/MatrixBandPart.cc
index bac31f13e..e52bddc1f 100644
--- a/runtime/onert/core/src/ir/operation/MatrixBandPart.cc
+++ b/runtime/onert/core/src/ir/operation/MatrixBandPart.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/MatrixBandPart.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -31,7 +28,7 @@ void MatrixBandPart::accept(OperationVisitor &v) const { v.visit(*this); }
MatrixBandPart::MatrixBandPart(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(3u), inputs, outputs}
{
}
diff --git a/runtime/onert/core/src/ir/operation/OneHot.cc b/runtime/onert/core/src/ir/operation/OneHot.cc
index 22935e7d6..90898f1ed 100644
--- a/runtime/onert/core/src/ir/operation/OneHot.cc
+++ b/runtime/onert/core/src/ir/operation/OneHot.cc
@@ -28,7 +28,7 @@ void OneHot::accept(OperationVisitor &v) const { v.visit(*this); }
OneHot::OneHot(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createExact(4u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(4u), inputs, outputs}, _param{param}
{
}
diff --git a/runtime/onert/core/src/ir/operation/PReLU.cc b/runtime/onert/core/src/ir/operation/PReLU.cc
index a2e37e0ad..87bd12e60 100644
--- a/runtime/onert/core/src/ir/operation/PReLU.cc
+++ b/runtime/onert/core/src/ir/operation/PReLU.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/PReLU.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -30,7 +27,7 @@ namespace operation
void PReLU::accept(OperationVisitor &v) const { v.visit(*this); }
PReLU::PReLU(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
{
}
diff --git a/runtime/onert/core/src/ir/operation/Pack.cc b/runtime/onert/core/src/ir/operation/Pack.cc
index f0908a2c6..00feadfb0 100644
--- a/runtime/onert/core/src/ir/operation/Pack.cc
+++ b/runtime/onert/core/src/ir/operation/Pack.cc
@@ -25,7 +25,7 @@ namespace operation
void Pack::accept(OperationVisitor &v) const { v.visit(*this); }
Pack::Pack(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createAtLeast(3u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createAtLeast(1u), inputs, outputs}, _param{param}
{
}
} // namespace operation
diff --git a/runtime/onert/core/src/ir/operation/Pad.cc b/runtime/onert/core/src/ir/operation/Pad.cc
index 0c56e92e3..a3f2d9752 100644
--- a/runtime/onert/core/src/ir/operation/Pad.cc
+++ b/runtime/onert/core/src/ir/operation/Pad.cc
@@ -30,7 +30,7 @@ void Pad::accept(OperationVisitor &v) const { v.visit(*this); }
// PAD: 2 inputs
// PADV2: 3 inputs
Pad::Pad(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createInRange(2u, 3u), inputs, outputs}
+ : Operation{OperandConstraint::createInRange(2u, 3u), inputs, outputs}
{
}
diff --git a/runtime/onert/core/src/ir/operation/Permute.cc b/runtime/onert/core/src/ir/operation/Permute.cc
index eefb6c542..813fbaf30 100644
--- a/runtime/onert/core/src/ir/operation/Permute.cc
+++ b/runtime/onert/core/src/ir/operation/Permute.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/Permute.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -30,7 +27,7 @@ namespace operation
void Permute::accept(OperationVisitor &v) const { v.visit(*this); }
Permute::Permute(const OperandIndex &input, const OperandIndex &output, Type type)
- : Operation{OperandConstraint::createExact(1u)}, _type{type}
+ : Operation{OperandConstraint::createExact(1u)}, _type{type}
{
setInputs({input});
setOutputs({output});
diff --git a/runtime/onert/core/src/ir/operation/Pool2D.cc b/runtime/onert/core/src/ir/operation/Pool2D.cc
index 761d14c3d..e32b876e6 100644
--- a/runtime/onert/core/src/ir/operation/Pool2D.cc
+++ b/runtime/onert/core/src/ir/operation/Pool2D.cc
@@ -15,12 +15,10 @@
*/
#include "ir/operation/Pool2D.h"
+#include "ir/OperationVisitor.h"
-#include <cassert>
#include <unordered_map>
-#include "ir/OperationVisitor.h"
-
namespace onert
{
namespace ir
@@ -32,7 +30,7 @@ void Pool2D::accept(OperationVisitor &v) const { v.visit(*this); }
Pool2D::Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
{
}
@@ -40,9 +38,9 @@ std::string Pool2D::name() const
{
using PoolType = onert::ir::operation::Pool2D::PoolType;
static const std::unordered_map<PoolType, std::string> name_map{
- {PoolType::AVG, "Avg" + std::string{toString(opcode())}},
- {PoolType::L2, "L2" + std::string{toString(opcode())}},
- {PoolType::MAX, "Max" + std::string{toString(opcode())}}};
+ {PoolType::AVG, "Avg" + std::string{toString(opcode())}},
+ {PoolType::L2, "L2" + std::string{toString(opcode())}},
+ {PoolType::MAX, "Max" + std::string{toString(opcode())}}};
return name_map.at(_param.op_type);
}
diff --git a/runtime/onert/core/src/ir/operation/Pow.cc b/runtime/onert/core/src/ir/operation/Pow.cc
index 940b1391a..f7c159a12 100644
--- a/runtime/onert/core/src/ir/operation/Pow.cc
+++ b/runtime/onert/core/src/ir/operation/Pow.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/Pow.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -30,7 +27,7 @@ namespace operation
void Pow::accept(OperationVisitor &v) const { v.visit(*this); }
Pow::Pow(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
{
}
diff --git a/runtime/onert/core/src/ir/operation/RNN.cc b/runtime/onert/core/src/ir/operation/RNN.cc
index 298c5e745..988a50669 100644
--- a/runtime/onert/core/src/ir/operation/RNN.cc
+++ b/runtime/onert/core/src/ir/operation/RNN.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/RNN.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -31,7 +28,7 @@ void RNN::accept(OperationVisitor &v) const { v.visit(*this); }
RNN::RNN(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createExact(5u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(5u), inputs, outputs}, _param{param}
{
}
diff --git a/runtime/onert/core/src/ir/operation/Range.cc b/runtime/onert/core/src/ir/operation/Range.cc
index 96ab04c1b..8ced92a0b 100644
--- a/runtime/onert/core/src/ir/operation/Range.cc
+++ b/runtime/onert/core/src/ir/operation/Range.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/Range.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -30,7 +27,7 @@ namespace operation
void Range::accept(OperationVisitor &v) const { v.visit(*this); }
Range::Range(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(3u), inputs, outputs}
{
}
diff --git a/runtime/onert/core/src/ir/operation/Rank.cc b/runtime/onert/core/src/ir/operation/Rank.cc
index c357e9018..40797bf29 100644
--- a/runtime/onert/core/src/ir/operation/Rank.cc
+++ b/runtime/onert/core/src/ir/operation/Rank.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/Rank.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -30,7 +27,7 @@ namespace operation
void Rank::accept(OperationVisitor &v) const { v.visit(*this); }
Rank::Rank(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}
{
}
diff --git a/runtime/onert/core/src/ir/operation/Reduce.cc b/runtime/onert/core/src/ir/operation/Reduce.cc
index d6a1d953c..8da1940fa 100644
--- a/runtime/onert/core/src/ir/operation/Reduce.cc
+++ b/runtime/onert/core/src/ir/operation/Reduce.cc
@@ -15,12 +15,10 @@
*/
#include "ir/operation/Reduce.h"
+#include "ir/OperationVisitor.h"
-#include <cassert>
#include <unordered_map>
-#include "ir/OperationVisitor.h"
-
namespace onert
{
namespace ir
@@ -32,7 +30,7 @@ void Reduce::accept(OperationVisitor &v) const { v.visit(*this); }
Reduce::Reduce(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
{
}
@@ -40,13 +38,13 @@ std::string Reduce::name() const
{
using ReduceType = onert::ir::operation::Reduce::ReduceType;
static const std::unordered_map<ReduceType, std::string> name_map{
- {ReduceType::ALL, std::string{toString(opcode())} + "All"},
- {ReduceType::ANY, std::string{toString(opcode())} + "Any"},
- {ReduceType::MAX, std::string{toString(opcode())} + "Max"},
- {ReduceType::MEAN, std::string{toString(opcode())} + "Mean"},
- {ReduceType::MIN, std::string{toString(opcode())} + "Min"},
- {ReduceType::PROD, std::string{toString(opcode())} + "Prod"},
- {ReduceType::SUM, std::string{toString(opcode())} + "SUM"}};
+ {ReduceType::ALL, std::string{toString(opcode())} + "All"},
+ {ReduceType::ANY, std::string{toString(opcode())} + "Any"},
+ {ReduceType::MAX, std::string{toString(opcode())} + "Max"},
+ {ReduceType::MEAN, std::string{toString(opcode())} + "Mean"},
+ {ReduceType::MIN, std::string{toString(opcode())} + "Min"},
+ {ReduceType::PROD, std::string{toString(opcode())} + "Prod"},
+ {ReduceType::SUM, std::string{toString(opcode())} + "SUM"}};
return name_map.at(_param.reduce_type);
// return std::string(toString(opcode())) + reduce_type_str_map.at(_param.reduce_type);
}
diff --git a/runtime/onert/core/src/ir/operation/Reshape.cc b/runtime/onert/core/src/ir/operation/Reshape.cc
index 92aa89ac6..0ed4affa1 100644
--- a/runtime/onert/core/src/ir/operation/Reshape.cc
+++ b/runtime/onert/core/src/ir/operation/Reshape.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/Reshape.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -31,7 +28,7 @@ void Reshape::accept(OperationVisitor &v) const { v.visit(*this); }
Reshape::Reshape(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param(param)
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param(param)
{
}
diff --git a/runtime/onert/core/src/ir/operation/ResizeBilinear.cc b/runtime/onert/core/src/ir/operation/ResizeBilinear.cc
index d0d89f45f..7d256f447 100644
--- a/runtime/onert/core/src/ir/operation/ResizeBilinear.cc
+++ b/runtime/onert/core/src/ir/operation/ResizeBilinear.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/ResizeBilinear.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -31,7 +28,7 @@ void ResizeBilinear::accept(OperationVisitor &v) const { v.visit(*this); }
ResizeBilinear::ResizeBilinear(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs, const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createInRange(1u, 2u), inputs, outputs}, _param{param}
{
}
diff --git a/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc b/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc
index 9f17af97c..58be87b95 100644
--- a/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc
+++ b/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/ResizeNearestNeighbor.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -32,7 +29,7 @@ void ResizeNearestNeighbor::accept(OperationVisitor &v) const { v.visit(*this);
ResizeNearestNeighbor::ResizeNearestNeighbor(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createInRange(1u, 2u), inputs, outputs}, _param{param}
{
}
diff --git a/runtime/onert/core/src/ir/operation/Reverse.cc b/runtime/onert/core/src/ir/operation/Reverse.cc
index 4b3c1e1af..6c3746426 100644
--- a/runtime/onert/core/src/ir/operation/Reverse.cc
+++ b/runtime/onert/core/src/ir/operation/Reverse.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/Reverse.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -30,7 +27,7 @@ namespace operation
void Reverse::accept(OperationVisitor &v) const { v.visit(*this); }
Reverse::Reverse(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
{
}
diff --git a/runtime/onert/core/src/ir/operation/Select.cc b/runtime/onert/core/src/ir/operation/Select.cc
index 1f22b5234..59684190c 100644
--- a/runtime/onert/core/src/ir/operation/Select.cc
+++ b/runtime/onert/core/src/ir/operation/Select.cc
@@ -28,7 +28,7 @@ namespace operation
void Select::accept(OperationVisitor &v) const { v.visit(*this); }
Select::Select(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(3u), inputs, outputs}
{
}
diff --git a/runtime/onert/core/src/ir/operation/Shape.cc b/runtime/onert/core/src/ir/operation/Shape.cc
index 2a63d6dcf..f90924488 100644
--- a/runtime/onert/core/src/ir/operation/Shape.cc
+++ b/runtime/onert/core/src/ir/operation/Shape.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/Shape.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -30,7 +27,7 @@ namespace operation
void Shape::accept(OperationVisitor &v) const { v.visit(*this); }
Shape::Shape(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}
{
}
diff --git a/runtime/onert/core/src/ir/operation/Slice.cc b/runtime/onert/core/src/ir/operation/Slice.cc
index 888b563fb..1362c0f91 100644
--- a/runtime/onert/core/src/ir/operation/Slice.cc
+++ b/runtime/onert/core/src/ir/operation/Slice.cc
@@ -27,7 +27,7 @@ namespace operation
void Slice::accept(OperationVisitor &v) const { v.visit(*this); }
Slice::Slice(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(3u), inputs, outputs}
{
}
diff --git a/runtime/onert/core/src/ir/operation/Softmax.cc b/runtime/onert/core/src/ir/operation/Softmax.cc
index 3f1aa0af1..c06c85309 100644
--- a/runtime/onert/core/src/ir/operation/Softmax.cc
+++ b/runtime/onert/core/src/ir/operation/Softmax.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/Softmax.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -31,7 +28,7 @@ void Softmax::accept(OperationVisitor &v) const { v.visit(*this); }
Softmax::Softmax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
{
}
diff --git a/runtime/onert/core/src/ir/operation/SpaceToBatchND.cc b/runtime/onert/core/src/ir/operation/SpaceToBatchND.cc
index 53fab4fa9..94acccb0c 100644
--- a/runtime/onert/core/src/ir/operation/SpaceToBatchND.cc
+++ b/runtime/onert/core/src/ir/operation/SpaceToBatchND.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/SpaceToBatchND.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -31,7 +28,7 @@ void SpaceToBatchND::accept(OperationVisitor &v) const { v.visit(*this); }
SpaceToBatchND::SpaceToBatchND(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(3u), inputs, outputs}
{
}
diff --git a/runtime/onert/core/src/ir/operation/SpaceToDepth.cc b/runtime/onert/core/src/ir/operation/SpaceToDepth.cc
index d8a45aee5..08e7e5190 100644
--- a/runtime/onert/core/src/ir/operation/SpaceToDepth.cc
+++ b/runtime/onert/core/src/ir/operation/SpaceToDepth.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/SpaceToDepth.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -31,7 +28,7 @@ void SpaceToDepth::accept(OperationVisitor &v) const { v.visit(*this); }
SpaceToDepth::SpaceToDepth(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
{
}
diff --git a/runtime/onert/core/src/ir/operation/Split.cc b/runtime/onert/core/src/ir/operation/Split.cc
index 244884e41..3e371188d 100644
--- a/runtime/onert/core/src/ir/operation/Split.cc
+++ b/runtime/onert/core/src/ir/operation/Split.cc
@@ -13,9 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
#include "ir/operation/Split.h"
-#include <cassert>
#include "ir/OperationVisitor.h"
+
namespace onert
{
namespace ir
@@ -25,7 +26,7 @@ namespace operation
void Split::accept(OperationVisitor &v) const { v.visit(*this); }
Split::Split(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
{
}
} // namespace operation
diff --git a/runtime/onert/core/src/ir/operation/SplitV.cc b/runtime/onert/core/src/ir/operation/SplitV.cc
index e638c9ac9..be13f167e 100644
--- a/runtime/onert/core/src/ir/operation/SplitV.cc
+++ b/runtime/onert/core/src/ir/operation/SplitV.cc
@@ -13,9 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
#include "ir/operation/SplitV.h"
-#include <cassert>
#include "ir/OperationVisitor.h"
+
namespace onert
{
namespace ir
@@ -25,7 +26,7 @@ namespace operation
void SplitV::accept(OperationVisitor &v) const { v.visit(*this); }
SplitV::SplitV(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
{
}
} // namespace operation
diff --git a/runtime/onert/core/src/ir/operation/SquaredDifference.cc b/runtime/onert/core/src/ir/operation/SquaredDifference.cc
index 49e58aaf2..db93903c7 100644
--- a/runtime/onert/core/src/ir/operation/SquaredDifference.cc
+++ b/runtime/onert/core/src/ir/operation/SquaredDifference.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/SquaredDifference.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -31,7 +28,7 @@ void SquaredDifference::accept(OperationVisitor &v) const { v.visit(*this); }
SquaredDifference::SquaredDifference(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
{
}
diff --git a/runtime/onert/core/src/ir/operation/Squeeze.cc b/runtime/onert/core/src/ir/operation/Squeeze.cc
index 8cf928fb4..e059c4bee 100644
--- a/runtime/onert/core/src/ir/operation/Squeeze.cc
+++ b/runtime/onert/core/src/ir/operation/Squeeze.cc
@@ -28,7 +28,7 @@ void Squeeze::accept(OperationVisitor &v) const { v.visit(*this); }
Squeeze::Squeeze(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param(param)
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param(param)
{
}
diff --git a/runtime/onert/core/src/ir/operation/StatelessRandomUniform.cc b/runtime/onert/core/src/ir/operation/StatelessRandomUniform.cc
index cbb0ff251..94be0be86 100644
--- a/runtime/onert/core/src/ir/operation/StatelessRandomUniform.cc
+++ b/runtime/onert/core/src/ir/operation/StatelessRandomUniform.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/StatelessRandomUniform.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -30,7 +27,7 @@ void StatelessRandomUniform::accept(OperationVisitor &v) const { v.visit(*this);
StatelessRandomUniform::StatelessRandomUniform(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
{
}
diff --git a/runtime/onert/core/src/ir/operation/StridedSlice.cc b/runtime/onert/core/src/ir/operation/StridedSlice.cc
index 2a7905995..a38282c93 100644
--- a/runtime/onert/core/src/ir/operation/StridedSlice.cc
+++ b/runtime/onert/core/src/ir/operation/StridedSlice.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/StridedSlice.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -31,7 +28,7 @@ void StridedSlice::accept(OperationVisitor &v) const { v.visit(*this); }
StridedSlice::StridedSlice(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createExact(4u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(4u), inputs, outputs}, _param{param}
{
}
diff --git a/runtime/onert/core/src/ir/operation/Tile.cc b/runtime/onert/core/src/ir/operation/Tile.cc
index 5ba3df2ad..51c1ff1dc 100644
--- a/runtime/onert/core/src/ir/operation/Tile.cc
+++ b/runtime/onert/core/src/ir/operation/Tile.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/Tile.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -30,7 +27,7 @@ namespace operation
void Tile::accept(OperationVisitor &v) const { v.visit(*this); }
Tile::Tile(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
{
}
diff --git a/runtime/onert/core/src/ir/operation/TopKV2.cc b/runtime/onert/core/src/ir/operation/TopKV2.cc
index a5e6c6a85..e1723d180 100644
--- a/runtime/onert/core/src/ir/operation/TopKV2.cc
+++ b/runtime/onert/core/src/ir/operation/TopKV2.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/TopKV2.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -31,7 +28,7 @@ void TopKV2::accept(OperationVisitor &v) const { v.visit(*this); }
TopKV2::TopKV2(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
{
}
diff --git a/runtime/onert/core/src/ir/operation/Transpose.cc b/runtime/onert/core/src/ir/operation/Transpose.cc
index 3a663fbce..dbc5ef2aa 100644
--- a/runtime/onert/core/src/ir/operation/Transpose.cc
+++ b/runtime/onert/core/src/ir/operation/Transpose.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/Transpose.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -29,9 +26,8 @@ namespace operation
void Transpose::accept(OperationVisitor &v) const { v.visit(*this); }
-Transpose::Transpose(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
- const Param &param)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+Transpose::Transpose(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
{
}
diff --git a/runtime/onert/core/src/ir/operation/TransposeConv.cc b/runtime/onert/core/src/ir/operation/TransposeConv.cc
index 7f29ca44e..944cc365d 100644
--- a/runtime/onert/core/src/ir/operation/TransposeConv.cc
+++ b/runtime/onert/core/src/ir/operation/TransposeConv.cc
@@ -15,9 +15,6 @@
*/
#include "ir/operation/TransposeConv.h"
-
-#include <cassert>
-
#include "ir/OperationVisitor.h"
namespace onert
@@ -31,7 +28,7 @@ void TransposeConv::accept(OperationVisitor &v) const { v.visit(*this); }
TransposeConv::TransposeConv(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs, const Param &param)
- : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
{
}
diff --git a/runtime/onert/core/src/ir/operation/Unpack.cc b/runtime/onert/core/src/ir/operation/Unpack.cc
index 67aa54ab5..185eddce3 100644
--- a/runtime/onert/core/src/ir/operation/Unpack.cc
+++ b/runtime/onert/core/src/ir/operation/Unpack.cc
@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
#include "ir/operation/Unpack.h"
#include "ir/OperationVisitor.h"
@@ -25,7 +26,7 @@ namespace operation
void Unpack::accept(OperationVisitor &v) const { v.visit(*this); }
Unpack::Unpack(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
{
}
} // namespace operation
diff --git a/runtime/onert/core/src/ir/operation/While.cc b/runtime/onert/core/src/ir/operation/While.cc
index 2505c60e3..f35996b07 100644
--- a/runtime/onert/core/src/ir/operation/While.cc
+++ b/runtime/onert/core/src/ir/operation/While.cc
@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
#include "ir/operation/While.h"
#include "ir/OperationVisitor.h"
@@ -25,7 +26,7 @@ namespace operation
void While::accept(OperationVisitor &v) const { v.visit(*this); }
While::While(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
const Param &param)
- : Operation{OperandConstraint::createAny(), inputs, outputs}, _param{param}
+ : Operation{OperandConstraint::createAny(), inputs, outputs}, _param{param}
{
}
} // namespace operation
diff --git a/runtime/onert/core/src/ir/train/TrainableGraph.cc b/runtime/onert/core/src/ir/train/TrainableGraph.cc
new file mode 100644
index 000000000..781f04956
--- /dev/null
+++ b/runtime/onert/core/src/ir/train/TrainableGraph.cc
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/train/TrainableGraph.h"
+#include "util/Utils.h"
+
+#include <algorithm>
+#include <misc/polymorphic_downcast.h>
+
+namespace onert
+{
+namespace ir
+{
+namespace train
+{
+
+TrainableGraph::TrainableGraph() : _graph{} {}
+
+TrainableGraph::TrainableGraph(const TrainableGraph &tgraph)
+ : _graph{tgraph._graph}, _derivatives{tgraph._derivatives}, _losses{tgraph._losses}
+{
+ tgraph.operations().iterate(
+ [&](const onert::ir::OperationIndex &index, const onert::ir::IOperation &op) {
+ replaceOperation(index, dynamic_cast<const ITrainableOperation &>(op).clone());
+ });
+}
+
+TrainableGraph::TrainableGraph(const Graph &graph) : _graph{graph} {}
+
+OperandIndex TrainableGraph::addOperand(const Shape &shape, const TypeInfo &type)
+{
+ return _graph.addOperand(shape, type);
+}
+
+OperandIndex TrainableGraph::addOperand(OperandIndex index, std::unique_ptr<Operand> &&operand)
+{
+ return _graph.addOperand(index, std::move(operand));
+}
+
+OperationIndex TrainableGraph::addOperation(std::unique_ptr<ITrainableOperation> &&operation)
+{
+ return _graph.addOperation(std::move(operation));
+}
+
+OperationIndex TrainableGraph::replaceOperation(OperationIndex index,
+ std::unique_ptr<ITrainableOperation> &&operation)
+{
+ return _graph.replaceOperation(index, std::move(operation));
+}
+
+OperandIndex TrainableGraph::addDerivative(OperandIndex index,
+ std::unique_ptr<Operand> &&derivative)
+{
+ return _derivatives.push(std::move(derivative), index);
+}
+
+IOIndex TrainableGraph::getInputIndex(const std::string &name) const
+{
+ return _graph.getInputIndex(name);
+}
+
+IOIndex TrainableGraph::getOutputIndex(const std::string &name) const
+{
+ return _graph.getOutputIndex(name);
+}
+
+void TrainableGraph::changeShape(const OperandIndex &index, const ir::Shape &new_shape)
+{
+ _graph.changeShape(index, new_shape);
+}
+
+void TrainableGraph::changeDerivativeShape(const OperandIndex &index, const ir::Shape &new_shape)
+{
+ assert(_derivatives.exist(index));
+ _derivatives.at(index).info().shape(new_shape);
+}
+
+void TrainableGraph::addInput(const OperandIndex &ind, const std::string &name)
+{
+ _graph.addInput(ind, name);
+}
+
+void TrainableGraph::addOutput(const OperandIndex &ind, const std::string &name)
+{
+ _graph.addOutput(ind, name);
+}
+
+void TrainableGraph::verify(void) const
+{
+ _graph.verify();
+
+ operations().iterate([](const onert::ir::OperationIndex &, const onert::ir::IOperation &op) {
+ try
+ {
+ UNUSED_RELEASE(dynamic_cast<const onert::ir::train::ITrainableOperation &>(op));
+ }
+ catch (const std::bad_cast &)
+ {
+ std::runtime_error("TrainableGraph: " + op.name() + " is not a trainable operation");
+ }
+ });
+}
+
+void TrainableGraph::removeOperand(const OperandIndex &ind) { _graph.removeOperand(ind); }
+
+void TrainableGraph::setLayout(Layout layout) { _graph.setLayout(layout); }
+
+const ITrainableOperation &TrainableGraph::operation(OperationIndex index) const
+{
+ // NOTE Virtual inherited objects cannot be static_casted.
+ return dynamic_cast<const ITrainableOperation &>(_graph.operations().at(index));
+}
+
+std::vector<ir::OperationIndex> TrainableGraph::topolSortOperations() const
+{
+ return _graph.topolSortOperations();
+}
+
+void TrainableGraph::addLoss(const OperandIndex &loss_ind, const IOIndex &pred_ioind)
+{
+ _losses.emplace(pred_ioind, loss_ind);
+}
+
+OperandIndex TrainableGraph::getLossIndex(const IOIndex &pred_ioind) const
+{
+ auto itr = _losses.find(pred_ioind);
+ return (itr == _losses.end()) ? OperandIndex{} : itr->second;
+}
+
+} // namespace train
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/train/operation/Conv2D.cc b/runtime/onert/core/src/ir/train/operation/Conv2D.cc
new file mode 100644
index 000000000..923861ae3
--- /dev/null
+++ b/runtime/onert/core/src/ir/train/operation/Conv2D.cc
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/train/operation/Conv2D.h"
+
+#include "ir/OperationVisitor.h"
+#include "ir/train/TrainableOperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace train
+{
+namespace operation
+{
+
+std::unique_ptr<ITrainableOperation> Conv2D::clone() const
+{
+ return std::make_unique<Conv2D>(*this);
+}
+
+void Conv2D::accept(OperationVisitor &v) const { v.visit(*this); }
+
+void Conv2D::accept(TrainableOperationVisitor &v) const { v.visit(*this); }
+
+Conv2D::Conv2D(const OperationType &operation)
+ : OperationType{operation.getInputs(), operation.getOutputs(), operation.param()}
+{
+ // DO NOTHING
+}
+
+} // namespace operation
+} // namespace train
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/train/operation/ElementwiseActivation.cc b/runtime/onert/core/src/ir/train/operation/ElementwiseActivation.cc
new file mode 100644
index 000000000..1dae3f674
--- /dev/null
+++ b/runtime/onert/core/src/ir/train/operation/ElementwiseActivation.cc
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/train/operation/ElementwiseActivation.h"
+
+#include "ir/OperationVisitor.h"
+#include "ir/train/TrainableOperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace train
+{
+namespace operation
+{
+
+std::unique_ptr<ITrainableOperation> ElementwiseActivation::clone() const
+{
+ return std::make_unique<ElementwiseActivation>(*this);
+}
+
+void ElementwiseActivation::accept(OperationVisitor &v) const { v.visit(*this); }
+
+void ElementwiseActivation::accept(TrainableOperationVisitor &v) const { v.visit(*this); }
+
+ElementwiseActivation::ElementwiseActivation(const OperationType &operation)
+ : OperationType{operation.getInputs(), operation.getOutputs(), operation.param()}
+{
+ // DO NOTHING
+}
+
+} // namespace operation
+} // namespace train
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/train/operation/FullyConnected.cc b/runtime/onert/core/src/ir/train/operation/FullyConnected.cc
new file mode 100644
index 000000000..a26f7c489
--- /dev/null
+++ b/runtime/onert/core/src/ir/train/operation/FullyConnected.cc
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/train/operation/FullyConnected.h"
+
+#include "ir/OperationVisitor.h"
+#include "ir/train/TrainableOperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace train
+{
+namespace operation
+{
+
+std::unique_ptr<ITrainableOperation> FullyConnected::clone() const
+{
+ return std::make_unique<FullyConnected>(*this);
+}
+
+void FullyConnected::accept(OperationVisitor &v) const { v.visit(*this); }
+
+void FullyConnected::accept(TrainableOperationVisitor &v) const { v.visit(*this); }
+
+FullyConnected::FullyConnected(const OperationType &operation)
+ : OperationType{operation.getInputs(), operation.getOutputs(), operation.param()}
+{
+ // DO NOTHING
+}
+
+} // namespace operation
+} // namespace train
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/train/operation/Loss.cc b/runtime/onert/core/src/ir/train/operation/Loss.cc
new file mode 100644
index 000000000..abd79929b
--- /dev/null
+++ b/runtime/onert/core/src/ir/train/operation/Loss.cc
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/train/operation/Loss.h"
+
+#include "ir/OperationVisitor.h"
+#include "ir/train/TrainableOperationVisitor.h"
+
+#include <misc/polymorphic_downcast.h>
+
+namespace onert
+{
+namespace ir
+{
+namespace train
+{
+namespace operation
+{
+
+std::unique_ptr<ITrainableOperation> Loss::clone() const { return std::make_unique<Loss>(*this); }
+
+void Loss::accept(OperationVisitor &v) const { v.visit(*this); }
+
+void Loss::accept(TrainableOperationVisitor &v) const { v.visit(*this); }
+
+Loss::Loss(const OperationType &operation)
+ : OperationType{operation.getInputs(), operation.getOutputs(), operation.param()}
+{
+ // DO NOTHING
+}
+
+} // namespace operation
+} // namespace train
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/train/operation/Permute.cc b/runtime/onert/core/src/ir/train/operation/Permute.cc
new file mode 100644
index 000000000..adc23aa49
--- /dev/null
+++ b/runtime/onert/core/src/ir/train/operation/Permute.cc
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/train/operation/Permute.h"
+
+#include "ir/OperationVisitor.h"
+#include "ir/train/TrainableOperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace train
+{
+namespace operation
+{
+
+std::unique_ptr<ITrainableOperation> Permute::clone() const
+{
+ return std::make_unique<Permute>(*this);
+}
+
+void Permute::accept(OperationVisitor &v) const { v.visit(*this); }
+
+void Permute::accept(TrainableOperationVisitor &v) const { v.visit(*this); }
+
+Permute::Permute(const OperationType &operation)
+ : OperationType{operation.getInputs().at(0), operation.getOutputs().at(0),
+ operation.getPermuteType()}
+{
+ // DO NOTHING
+}
+
+} // namespace operation
+} // namespace train
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/train/operation/Pool2D.cc b/runtime/onert/core/src/ir/train/operation/Pool2D.cc
new file mode 100644
index 000000000..021574f19
--- /dev/null
+++ b/runtime/onert/core/src/ir/train/operation/Pool2D.cc
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/train/operation/Pool2D.h"
+
+#include "ir/OperationVisitor.h"
+#include "ir/train/TrainableOperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace train
+{
+namespace operation
+{
+
+std::unique_ptr<ITrainableOperation> Pool2D::clone() const
+{
+ return std::make_unique<Pool2D>(*this);
+}
+
+void Pool2D::accept(OperationVisitor &v) const { v.visit(*this); }
+
+void Pool2D::accept(TrainableOperationVisitor &v) const { v.visit(*this); }
+
+Pool2D::Pool2D(const OperationType &operation)
+ : OperationType{operation.getInputs(), operation.getOutputs(), operation.param()}
+{
+ // DO NOTHING
+}
+
+} // namespace operation
+} // namespace train
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/train/operation/Reshape.cc b/runtime/onert/core/src/ir/train/operation/Reshape.cc
new file mode 100644
index 000000000..c76158607
--- /dev/null
+++ b/runtime/onert/core/src/ir/train/operation/Reshape.cc
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/train/operation/Reshape.h"
+
+#include "ir/OperationVisitor.h"
+#include "ir/train/TrainableOperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace train
+{
+namespace operation
+{
+
+std::unique_ptr<ITrainableOperation> Reshape::clone() const
+{
+ return std::make_unique<Reshape>(*this);
+}
+
+void Reshape::accept(OperationVisitor &v) const { v.visit(*this); }
+
+void Reshape::accept(TrainableOperationVisitor &v) const { v.visit(*this); }
+
+Reshape::Reshape(const OperationType &operation)
+ : OperationType{operation.getInputs(), operation.getOutputs(), operation.param()}
+{
+ // DO NOTHING
+}
+
+} // namespace operation
+} // namespace train
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/train/operation/Softmax.cc b/runtime/onert/core/src/ir/train/operation/Softmax.cc
new file mode 100644
index 000000000..dbd403879
--- /dev/null
+++ b/runtime/onert/core/src/ir/train/operation/Softmax.cc
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/train/operation/Softmax.h"
+
+#include "ir/OperationVisitor.h"
+#include "ir/train/TrainableOperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace train
+{
+namespace operation
+{
+
+std::unique_ptr<ITrainableOperation> Softmax::clone() const
+{
+ return std::make_unique<Softmax>(*this);
+}
+
+void Softmax::accept(OperationVisitor &v) const { v.visit(*this); }
+
+void Softmax::accept(TrainableOperationVisitor &v) const { v.visit(*this); }
+
+Softmax::Softmax(const OperationType &operation)
+ : OperationType{operation.getInputs(), operation.getOutputs(), operation.param()}
+{
+ // DO NOTHING
+}
+
+} // namespace operation
+} // namespace train
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/verifier/Verifier.cc b/runtime/onert/core/src/ir/verifier/Verifier.cc
index 09cbdcf2f..6260d29ff 100644
--- a/runtime/onert/core/src/ir/verifier/Verifier.cc
+++ b/runtime/onert/core/src/ir/verifier/Verifier.cc
@@ -39,11 +39,11 @@ bool DAGChecker::verify(const Graph &graph) const noexcept
OperationIndexMap<bool> visited;
operations.iterate(
- [&](const OperationIndex &index, const Operation &) { visited[index] = false; });
+ [&](const OperationIndex &index, const IOperation &) { visited[index] = false; });
OperationIndexMap<bool> on_stack = visited; // Copy from visited
- std::function<void(const OperationIndex &index, const Operation &)> dfs_recursive =
- [&](const OperationIndex &index, const Operation &node) -> void {
+ std::function<void(const OperationIndex &index, const IOperation &)> dfs_recursive =
+ [&](const OperationIndex &index, const IOperation &node) -> void {
if (on_stack[index])
cyclic = true;
if (visited[index])
@@ -51,7 +51,7 @@ bool DAGChecker::verify(const Graph &graph) const noexcept
visited[index] = true;
on_stack[index] = true;
- for (auto output : node.getOutputs() | Remove::DUPLICATED)
+ for (auto &&output : node.getOutputs() | Remove::DUPLICATED | Remove::UNDEFINED)
{
const auto &operand = graph.operands().at(output);
for (const auto &use : operand.getUses())
@@ -72,12 +72,12 @@ bool DAGChecker::verify(const Graph &graph) const noexcept
// EdgeConsistencyVerifier
//
-bool EdgeConsistencyChecker::verify(const Graph &graph) const noexcept
+bool EdgeChecker::verify(const Graph &graph) const noexcept
{
auto &operations = graph.operations();
uint32_t errors = 0;
- operations.iterate([&](const OperationIndex &index, const Operation &node) {
- for (auto operand_index : node.getInputs() | ir::Remove::UNDEFINED)
+ operations.iterate([&](const OperationIndex &index, const IOperation &node) {
+ for (auto &&operand_index : node.getInputs() | ir::Remove::UNDEFINED)
{
try
{
@@ -85,48 +85,60 @@ bool EdgeConsistencyChecker::verify(const Graph &graph) const noexcept
bool operand_has_use = operand.getUses().contains(index);
if (!operand_has_use)
{
- VERBOSE(EdgeConsistencyChecker) << "[ERROR] EDGE MISMATCH : Missing USE edge - Operand "
- << operand_index << " to Operation " << index
- << std::endl;
+ VERBOSE(EdgeChecker) << "[ERROR] EDGE MISMATCH : Missing USE edge - Operand "
+ << operand_index << " to Operation " << index << std::endl;
errors += 1;
}
}
catch (const std::out_of_range &e)
{
- VERBOSE(EdgeConsistencyChecker)
- << "[ERROR] OPEARAND NOT FOUND : Operation " << index << " has Operand "
- << operand_index << ", but the operand object is not present in the graph" << std::endl;
+ VERBOSE(EdgeChecker) << "[ERROR] OPEARAND NOT FOUND : Operation " << index
+ << " has Operand " << operand_index
+ << ", but the operand object is not present in the graph" << std::endl;
errors += 1;
}
}
- for (auto operand_index : node.getOutputs())
+ for (auto &&operand_index : node.getOutputs() | ir::Remove::UNDEFINED)
{
try
{
auto &operand = graph.operands().at(operand_index);
if (operand.getDef() != index)
{
- VERBOSE(EdgeConsistencyChecker) << "[ERROR] EDGE MISMATCH : Missing DEF edge - Operand"
- << operand_index << " to Operation " << index
- << std::endl;
+ VERBOSE(EdgeChecker) << "[ERROR] EDGE MISMATCH : Missing DEF edge - Operand"
+ << operand_index << " to Operation " << index << std::endl;
errors += 1;
}
}
catch (const std::out_of_range &e)
{
- VERBOSE(EdgeConsistencyChecker)
- << "[ERROR] OPEARAND NOT FOUND : Operation " << index << " has Operand "
- << operand_index << ", but the operand object is not present in the graph" << std::endl;
+ VERBOSE(EdgeChecker) << "[ERROR] OPEARAND NOT FOUND : Operation " << index
+ << " has Operand " << operand_index
+ << ", but the operand object is not present in the graph" << std::endl;
errors += 1;
}
}
});
- VERBOSE(EdgeConsistencyChecker) << "Total Number of errors : " << errors << std::endl;
+ VERBOSE(EdgeChecker) << "Total Number of errors : " << errors << std::endl;
return errors == 0;
}
+bool InputOutputChecker::verify(const Graph &graph) const noexcept
+{
+ for (auto &&operand_ind :
+ (graph.getInputs() + graph.getOutputs()) | Remove::DUPLICATED | Remove::UNDEFINED)
+ {
+ if (!graph.operands().exist(operand_ind))
+ {
+ VERBOSE(InputOutputChecker) << "Input or Output tensor " << operand_ind << " does not exist.";
+ return false;
+ }
+ }
+ return true;
+}
+
} // namespace verifier
} // namespace ir
} // namespace onert
diff --git a/runtime/onert/core/src/ir/verifier/Verifier.h b/runtime/onert/core/src/ir/verifier/Verifier.h
index 0c7b57b04..fa1311983 100644
--- a/runtime/onert/core/src/ir/verifier/Verifier.h
+++ b/runtime/onert/core/src/ir/verifier/Verifier.h
@@ -55,7 +55,16 @@ public:
bool verify(const Graph &graph) const noexcept override;
};
-class EdgeConsistencyChecker : public IVerifier
+class EdgeChecker : public IVerifier
+{
+public:
+ bool verify(const Graph &graph) const noexcept override;
+};
+
+/**
+ * @brief Check model input and output operands are really exist in the graph
+ */
+class InputOutputChecker : public IVerifier
{
public:
bool verify(const Graph &graph) const noexcept override;
diff --git a/runtime/onert/core/src/ir/verifier/Verifier.test.cc b/runtime/onert/core/src/ir/verifier/Verifier.test.cc
new file mode 100644
index 000000000..1ec71cd55
--- /dev/null
+++ b/runtime/onert/core/src/ir/verifier/Verifier.test.cc
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Verifier.h"
+
+#include "../MockNode.h"
+
+#include "ir/Graph.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+
+using IndexSet = onert::ir::OperandIndexSequence;
+using Mock = onert_test::ir::SimpleMock;
+
+TEST(Verifier, dag_checker)
+{
+ onert::ir::Graph graph;
+
+ onert::ir::Shape shape{3};
+ onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+ auto operand1 = graph.addOperand(shape, type);
+ auto operand2 = graph.addOperand(shape, type);
+
+ graph.addInput(operand1);
+ graph.addOutput(operand2);
+
+ graph.addOperation(std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2}));
+
+ onert::ir::verifier::DAGChecker verifier;
+
+ ASSERT_TRUE(verifier.verify(graph));
+}
+
+TEST(Verifier, neg_edge_consistency_checker_1)
+{
+ onert::ir::Graph graph;
+
+ onert::ir::Shape shape{3};
+ onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+ auto operand1 = graph.addOperand(shape, type);
+ auto operand2 = graph.addOperand(shape, type);
+
+ graph.addInput(operand1);
+ graph.addOutput(operand2);
+
+ auto mock_op = std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2});
+ auto op_ind = graph.addOperation(std::move(mock_op));
+
+ graph.operands().at(operand1).removeUse(op_ind); // Manipulate the operand alone
+
+ onert::ir::verifier::EdgeChecker verifier;
+ ASSERT_FALSE(verifier.verify(graph));
+}
+
+TEST(Verifier, neg_edge_consistency_checker_2)
+{
+ onert::ir::Graph graph;
+
+ onert::ir::Shape shape{3};
+ onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+ auto operand1 = graph.addOperand(shape, type);
+ auto operand2 = graph.addOperand(shape, type);
+
+ graph.addInput(operand1);
+ graph.addOutput(operand2);
+
+ auto mock_op = std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2});
+ auto mock_op_ptr = mock_op.get();
+ auto op_ind = graph.addOperation(std::move(mock_op));
+
+ mock_op_ptr->setInputs({operand2}); // Manipulate the operation alone
+
+ onert::ir::verifier::EdgeChecker verifier;
+ ASSERT_FALSE(verifier.verify(graph));
+}
diff --git a/runtime/onert/core/src/odc/QuantizeManager.cc b/runtime/onert/core/src/odc/QuantizeManager.cc
new file mode 100644
index 000000000..71572a7e0
--- /dev/null
+++ b/runtime/onert/core/src/odc/QuantizeManager.cc
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "QuantizerLoader.h"
+#include "odc/QuantizeManager.h"
+
+#include <iostream>
+#include <mutex>
+
+namespace onert
+{
+namespace odc
+{
+
+bool QuantizeManager::quantize()
+{
+ // Compile function is thread-unsafe
+ static std::mutex lock;
+ std::lock_guard<std::mutex> guard(lock);
+
+ if (_export_model_path.empty())
+ throw std::runtime_error("Export model path is not set");
+
+ auto &quantize_loader = QuantizerLoader::instance();
+ if (quantize_loader.loadLibrary() != 0)
+ return false;
+
+ auto quantizer = quantize_loader.get();
+ auto result = quantizer->quantize(_model_path.c_str(), _export_model_path.c_str(), _is_q16);
+
+ // TODO Unload quantize library to reduce memory usage
+
+ return (result == 0);
+}
+
+} // namespace odc
+} // namespace onert
diff --git a/runtime/onert/core/src/util/GeneralConfigSource.cc b/runtime/onert/core/src/odc/QuantizeManager.test.cc
index 7d2757e58..4e155a6ef 100644
--- a/runtime/onert/core/src/util/GeneralConfigSource.cc
+++ b/runtime/onert/core/src/odc/QuantizeManager.test.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,32 +14,23 @@
* limitations under the License.
*/
-#include "util/GeneralConfigSource.h"
-#include "util/logging.h"
+#include "odc/QuantizeManager.h"
-namespace onert
-{
-namespace util
-{
+#include <gtest/gtest.h>
-std::string GeneralConfigSource::get(const std::string &key) const
+using namespace onert::odc;
+
+// Test export model path is not set
+TEST(odc_QuantizeManager, neg_export_model_path)
{
- auto itr = _map.find(key);
- if (itr == _map.end())
- {
- return "";
- }
- else
- {
- return itr->second;
- }
+ QuantizeManager manager("model_path");
+ ASSERT_THROW(manager.quantize(), std::runtime_error);
}
-void GeneralConfigSource::set(const std::string &key, const std::string &val)
+// Test invalid model path
+TEST(odc_QuantizeManager, neg_invalid_model_path)
{
- VERBOSE(GeneralConfigSource) << key << " : " << val << std::endl;
- _map[key] = val;
+ QuantizeManager manager("invalid_model_path.circle");
+ manager.exportModelPath("export_model_path.circle");
+ ASSERT_EQ(manager.quantize(), false);
}
-
-} // namespace util
-} // namespace onert
diff --git a/runtime/onert/core/src/odc/QuantizerLoader.cc b/runtime/onert/core/src/odc/QuantizerLoader.cc
new file mode 100644
index 000000000..8a972e97e
--- /dev/null
+++ b/runtime/onert/core/src/odc/QuantizerLoader.cc
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "QuantizerLoader.h"
+
+#include <dlfcn.h>
+#include <iostream>
+#include <string>
+
+static const char *SHARED_LIB_EXT =
+#if defined(__APPLE__) && defined(__MACH__)
+ ".dylib";
+#else
+ ".so";
+#endif
+
+namespace onert
+{
+namespace odc
+{
+
+QuantizerLoader &QuantizerLoader::instance()
+{
+ static QuantizerLoader singleton;
+ return singleton;
+}
+
+int32_t QuantizerLoader::loadLibrary()
+{
+ if (get() != nullptr)
+ return 0;
+
+ const std::string quantize_so = std::string("libonert_odc") + SHARED_LIB_EXT;
+ void *handle = dlopen(quantize_so.c_str(), RTLD_LAZY | RTLD_LOCAL);
+ auto dlerror_msg = dlerror();
+
+ if (handle == nullptr)
+ {
+ std::cerr << "Failed to load " << quantize_so << std::endl;
+ std::cerr << dlerror_msg << std::endl;
+ return 1;
+ }
+
+ {
+ const char *factory_name = "create_quantizer";
+ auto factory = (factory_t)dlsym(handle, factory_name);
+ dlerror_msg = dlerror();
+
+ if (factory == nullptr)
+ {
+ std::cerr << "QuantizerLoader: unable to find function " << factory_name << dlerror_msg
+ << std::endl;
+ dlclose(handle);
+ return 1;
+ }
+
+ auto destroyer = (quantizer_destory_t)dlsym(handle, "destroy_quantizer");
+ _quantizer = std::unique_ptr<IQuantizer, quantizer_destory_t>(factory(), destroyer);
+
+ if (_quantizer == nullptr)
+ {
+ std::cerr << "QuantizerLoader: unable to create quantizer" << std::endl;
+ dlclose(handle);
+ return 1;
+ }
+ }
+
+ // Save quantize library handle (avoid warning by handle lost without dlclose())
+ // clang-format off
+ _dlhandle = std::unique_ptr<void, dlhandle_destroy_t>{handle, [filename = quantize_so](void *h) {
+ if (dlclose(h) != 0)
+ std::cerr << "Failed to unload backend " << filename << std::endl;
+ }};
+ // clang-format on
+
+ return 0;
+}
+
+int32_t QuantizerLoader::unloadLibrary()
+{
+ if (get() == nullptr)
+ return 0;
+
+ _quantizer.reset(nullptr);
+ _dlhandle.reset(nullptr);
+
+ return 0;
+}
+
+} // namespace odc
+} // namespace onert
diff --git a/runtime/onert/core/src/odc/QuantizerLoader.h b/runtime/onert/core/src/odc/QuantizerLoader.h
new file mode 100644
index 000000000..36a9f2996
--- /dev/null
+++ b/runtime/onert/core/src/odc/QuantizerLoader.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_ODC_QUANTIZER_LOADER_H__
+#define __ONERT_ODC_QUANTIZER_LOADER_H__
+
+#include "odc/IQuantizer.h"
+
+#include <functional>
+#include <memory>
+
+namespace onert
+{
+namespace odc
+{
+
+/**
+ * @brief Class to manage loading and unloading of dynamic library containing
+ * implementation of IQuantizer interface
+ */
+class QuantizerLoader
+{
+public:
+ /**
+ * @brief Typedef for function pointer to destroy loaded library handle
+ */
+ using dlhandle_destroy_t = std::function<void(void *)>;
+ /**
+ * @brief Typedef for function pointer to create instance of IQuantizer
+ */
+ using factory_t = IQuantizer *(*)();
+ /**
+ * @brief Typedef for function pointer to destroy instance of IQuantizer
+ */
+ using quantizer_destory_t = void (*)(IQuantizer *);
+
+ /**
+ * @brief Get singleton instance of QuantizerLoader
+ * @return Reference to singleton instance of QuantizerLoader
+ */
+ static QuantizerLoader &instance();
+
+private:
+ // Cannot create instance of QuantizerLoader outside of this class
+ QuantizerLoader() = default;
+ QuantizerLoader(QuantizerLoader const &) = delete;
+ QuantizerLoader &operator=(QuantizerLoader const &) = delete;
+ ~QuantizerLoader() = default;
+
+public:
+ /**
+ * @brief Load dynamic library containing implementation of IQuantizer
+ * @return 0 if success, otherwise errno value
+ */
+ int32_t loadLibrary();
+ /**
+ * @brief Unload dynamic library containing implementation of IQuantizer
+ * @return 0 if success, otherwise errno value
+ */
+ int32_t unloadLibrary();
+ /**
+ * @brief Get instance of IQuantizer created through factory method
+ * @return Pointer to instance of IQuantizer
+ */
+ IQuantizer *get() const { return _quantizer.get(); }
+
+private:
+ // Note: Keep handle to avoid svace warning of "handle lost without dlclose()"
+ std::unique_ptr<void, dlhandle_destroy_t> _dlhandle;
+ std::unique_ptr<IQuantizer, quantizer_destory_t> _quantizer{nullptr, nullptr};
+};
+
+} // namespace odc
+} // namespace onert
+
+#endif // __ONERT_ODC_QUANTIZER_LOADER_H__
diff --git a/runtime/onert/core/src/odc/QuantizerLoader.test.cc b/runtime/onert/core/src/odc/QuantizerLoader.test.cc
new file mode 100644
index 000000000..112e65b27
--- /dev/null
+++ b/runtime/onert/core/src/odc/QuantizerLoader.test.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "QuantizerLoader.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert::odc;
+
+// Test QuantizerLoader singleton
+TEST(odc_QuantizerLoader, singleton)
+{
+ QuantizerLoader &loader1 = QuantizerLoader::instance();
+ QuantizerLoader &loader2 = QuantizerLoader::instance();
+ ASSERT_EQ(&loader1, &loader2);
+}
+
+// Test load quantizer library
+TEST(odc_QuantizerLoader, load)
+{
+ QuantizerLoader &loader = QuantizerLoader::instance();
+ // Unload because it may be loaded on previous tests
+ ASSERT_EQ(loader.unloadLibrary(), 0);
+
+ if (loader.loadLibrary() == 0)
+ {
+ // Load twice to check if it is thread-safe
+ ASSERT_EQ(loader.loadLibrary(), 0);
+ }
+}
+
+// Get quantizer function without loading quantizer library
+TEST(odc_QuantizerLoader, neg_get)
+{
+ QuantizerLoader &loader = QuantizerLoader::instance();
+ // Unload because it may be loaded on previous tests
+ ASSERT_EQ(loader.unloadLibrary(), 0);
+ ASSERT_EQ(loader.get(), nullptr);
+}
+
+// Check quantizer function pointer when QuantizerLoader is unloaded
+TEST(odc_QuantizerLoader, neg_unload)
+{
+ QuantizerLoader &loader = QuantizerLoader::instance();
+ if (loader.loadLibrary() == 0)
+ ASSERT_NE(loader.get(), nullptr);
+
+ ASSERT_EQ(loader.unloadLibrary(), 0);
+ ASSERT_EQ(loader.get(), nullptr);
+}
diff --git a/runtime/onert/core/src/util/ChromeTracingEventWriter.cc b/runtime/onert/core/src/util/ChromeTracingEventWriter.cc
new file mode 100644
index 000000000..c3f5179df
--- /dev/null
+++ b/runtime/onert/core/src/util/ChromeTracingEventWriter.cc
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "EventWriter.h"
+
+#include <cassert>
+#include <sstream>
+#include <utility>
+#include <vector>
+
+// json type for ChromeTracingWriter
+namespace
+{
+
+std::string quote(const std::string &value)
+{
+ std::stringstream ss;
+ ss << '"' << value << '"';
+ return ss.str();
+}
+
+std::string field(const std::string &k, const std::string &v)
+{
+ std::stringstream ss;
+ ss << quote(k) << " : " << quote(v);
+ return ss.str();
+}
+
+struct Content // One Entry in Chrome Event Trace
+{
+ std::vector<std::pair<std::string, std::string>> flds;
+ std::vector<std::pair<std::string, std::string>> args;
+};
+
+std::string object(const Content &content)
+{
+ std::stringstream ss;
+
+ ss << "{ ";
+
+ ss << field(content.flds[0].first, content.flds[0].second);
+
+ for (uint32_t n = 1; n < content.flds.size(); ++n)
+ {
+ ss << ", " << field(content.flds.at(n).first, content.flds.at(n).second);
+ }
+
+ if (content.args.size() > 0)
+ {
+ ss << ", " << quote("args") << " : { ";
+ ss << field(content.args.at(0).first, content.args.at(0).second);
+
+ for (uint32_t n = 1; n < content.args.size(); ++n)
+ {
+ ss << ", " << field(content.args.at(n).first, content.args.at(n).second);
+ }
+
+ ss << "}";
+ }
+
+ ss << " }";
+
+ return ss.str();
+}
+
+void fill(Content &content, const DurationEvent &evt, const std::string &name,
+ const std::string &tid)
+{
+ content.flds.emplace_back("name", name);
+ content.flds.emplace_back("pid", "0");
+ content.flds.emplace_back("tid", tid);
+ content.flds.emplace_back("ph", evt.ph);
+ content.flds.emplace_back("ts", evt.ts);
+ content.args = evt.args;
+}
+
+void fill(Content &content, const CounterEvent &evt)
+{
+ assert(evt.name != "");
+
+ content.flds.emplace_back("name", evt.name);
+ content.flds.emplace_back("pid", "0");
+ content.flds.emplace_back("tid", evt.tid);
+ content.flds.emplace_back("ph", evt.ph);
+ content.flds.emplace_back("ts", evt.ts);
+ content.args = evt.args;
+}
+
+std::string object(const DurationEvent &evt, const std::string &name, const std::string &tid)
+{
+ Content content;
+
+ fill(content, evt, name, tid);
+
+ return ::object(content);
+}
+
+std::string object(const CounterEvent &evt)
+{
+ Content content;
+
+ fill(content, evt);
+
+ for (auto it = evt.values.begin(); it != evt.values.end(); ++it)
+ {
+ content.args.emplace_back(it->first, it->second);
+ }
+
+ return ::object(content);
+}
+
+std::string getSessionLabel(const DurationEvent &evt)
+{
+ return "$" + std::to_string(evt.session_index) + " sess";
+}
+
+std::string getSubgLabel(const DurationEvent &evt)
+{
+ return "$" + std::to_string(evt.subg_index) + " subg";
+}
+
+std::string getOpLabel(const OpSeqDurationEvent &evt)
+{
+ return "@" + std::to_string(evt.op_index) + " " + evt.op_name;
+}
+
+std::string getLabel(const DurationEvent &evt)
+{
+ if (auto evt_ptr = dynamic_cast<const OpSeqDurationEvent *>(&evt))
+ {
+ return getOpLabel(*evt_ptr);
+ }
+ else // SubgDurationEvent
+ {
+ return getSubgLabel(evt);
+ }
+}
+
+std::string getTid(const DurationEvent &evt)
+{
+ if (auto evt_ptr = dynamic_cast<const OpSeqDurationEvent *>(&evt))
+ {
+ return getSessionLabel(*evt_ptr) + ", " + getSubgLabel(*evt_ptr) + ", " + evt_ptr->backend;
+ }
+ else // SubgDurationEvent
+ {
+ return getSessionLabel(evt) + ", " + getSubgLabel(evt);
+ }
+}
+
+} // namespace
+
+void ChromeTracingWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &recorders)
+{
+ _os << "{\n";
+ _os << " " << quote("traceEvents") << ": [\n";
+
+ for (const auto &recorder : recorders)
+ {
+ flushOneRecord(*recorder);
+ }
+
+ _os << " { }\n";
+ _os << " ]\n";
+ _os << "}\n";
+}
+
+void ChromeTracingWriter::flushOneRecord(const EventRecorder &recorder)
+{
+ for (const auto &evt : recorder.duration_events())
+ {
+ const std::string name = getLabel(*evt);
+ const std::string tid = getTid(*evt);
+
+ _os << " " << object(*evt, name, tid) << ",\n";
+ }
+
+ for (const auto &evt : recorder.counter_events())
+ {
+ _os << " " << object(evt) << ",\n";
+ }
+}
diff --git a/runtime/onert/core/src/util/ConfigSource.cc b/runtime/onert/core/src/util/ConfigSource.cc
index 45cce662e..b7fcefc7a 100644
--- a/runtime/onert/core/src/util/ConfigSource.cc
+++ b/runtime/onert/core/src/util/ConfigSource.cc
@@ -15,13 +15,15 @@
*/
#include "util/ConfigSource.h"
-#include "util/GeneralConfigSource.h"
-#include "util/EnvConfigSource.h"
+#include "util/logging.h"
+
+#include <misc/EnvConfigSource.h>
+#include <misc/GeneralConfigSource.h>
+#include <misc/IConfigSource.h>
-#include <array>
#include <algorithm>
+#include <array>
#include <cassert>
-
#include <memory>
namespace onert
@@ -29,9 +31,26 @@ namespace onert
namespace util
{
+using namespace nnfw::misc;
+
static std::unique_ptr<IConfigSource> _source;
+static std::unique_ptr<IConfigSource> _source_ext;
void config_source(std::unique_ptr<IConfigSource> &&source) { _source = std::move(source); }
+void config_source_ext(std::unique_ptr<IConfigSource> &&source) { _source_ext = std::move(source); }
+
+void setConfigKeyValues(const CfgKeyValues &keyValues)
+{
+ auto configsrc = std::make_unique<GeneralConfigSource>();
+
+ for (auto it = keyValues.begin(); it != keyValues.end(); ++it)
+ {
+ VERBOSE(NNPKG_CONFIGS) << "(" << it->first << ") = (" << it->second << ")" << std::endl;
+ configsrc->set(it->first, it->second);
+ }
+
+ onert::util::config_source_ext(std::move(configsrc));
+}
static IConfigSource *config_source()
{
@@ -67,6 +86,15 @@ static std::string getConfigOrDefault(const std::string &key)
auto ret = config_source()->get(key);
if (ret.empty())
{
+ // if env is not set, search from external
+ if (_source_ext.get())
+ {
+ ret = _source_ext.get()->get(key);
+ }
+ }
+ // if not found search from defaults
+ if (ret.empty())
+ {
auto itr = defaults.find(key);
if (itr != defaults.end())
{
diff --git a/runtime/onert/core/src/util/EventCollector.cc b/runtime/onert/core/src/util/EventCollector.cc
index de37276bf..c1b9c4315 100644
--- a/runtime/onert/core/src/util/EventCollector.cc
+++ b/runtime/onert/core/src/util/EventCollector.cc
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "util/EventCollector.h"
+#include "EventCollector.h"
// C++ standard libraries
#include <chrono>
@@ -30,24 +30,62 @@ std::string timestamp(void)
{
auto now = std::chrono::steady_clock::now();
return std::to_string(
- std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count());
+ std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count());
}
-class DurationEventBuilder
+class DurationEventBuilder : public EventCollector::EventVisitor
{
public:
DurationEventBuilder(const std::string &ts) : _ts{ts} {}
- DurationEvent build(const std::string &tid, const std::string &name, const std::string &ph) const
+ std::unique_ptr<SubgDurationEvent> build(const EventCollector::SubgEvent &evt_collected,
+ const std::string &ph) const
{
- DurationEvent evt;
+ auto dur_evt = std::make_unique<SubgDurationEvent>();
- evt.name = name;
- evt.tid = tid;
- evt.ph = ph;
- evt.ts = _ts;
+ // The following will be set by a child of EventsWriter:
+ // dur_evt.name, dur_evt.tid
+ dur_evt->ph = ph;
+ dur_evt->ts = _ts;
+ dur_evt->tracing_ctx = evt_collected.tracing_ctx;
- return evt;
+ dur_evt->session_index = evt_collected.session_index;
+ dur_evt->subg_index = evt_collected.subg_index;
+
+ dur_evt->args = evt_collected.userData;
+ {
+ dur_evt->args.emplace_back("session", std::to_string(evt_collected.session_index));
+ dur_evt->args.emplace_back("subgraph", std::to_string(evt_collected.subg_index));
+ }
+
+ return dur_evt;
+ }
+
+ std::unique_ptr<OpSeqDurationEvent> build(const EventCollector::OpSeqEvent &evt_collected,
+ const std::string &ph) const
+ {
+ auto dur_evt = std::make_unique<OpSeqDurationEvent>();
+
+ // The following will be set by a child of EventsWriter:
+ // dur_evt.name, dur_evt.tid
+ dur_evt->ph = ph;
+ dur_evt->ts = _ts;
+ dur_evt->tracing_ctx = evt_collected.tracing_ctx;
+
+ dur_evt->session_index = evt_collected.session_index;
+ dur_evt->subg_index = evt_collected.subg_index;
+
+ dur_evt->backend = evt_collected.backend;
+ dur_evt->op_index = evt_collected.op_index;
+ dur_evt->op_name = evt_collected.op_name;
+
+ dur_evt->args = evt_collected.userData;
+ {
+ dur_evt->args.emplace_back("session", std::to_string(evt_collected.session_index));
+ dur_evt->args.emplace_back("subgraph", std::to_string(evt_collected.subg_index));
+ }
+
+ return dur_evt;
}
private:
@@ -86,19 +124,26 @@ inline void emit_rusage(EventRecorder *rec, const std::string &ts)
} // namespace
-void EventCollector::onEvent(const Event &event)
+template <typename EventT> void EventCollector::onEvent(const EventT &event)
{
auto ts = timestamp();
+ DurationEventBuilder builder(ts);
+
switch (event.edge)
{
case Edge::BEGIN:
- _rec->emit(DurationEventBuilder(ts).build(event.backend, event.label, "B"));
+ {
+ auto duration_evt = builder.build(event, "B");
+ _rec->emit(std::move(duration_evt));
break;
-
+ }
case Edge::END:
- _rec->emit(DurationEventBuilder(ts).build(event.backend, event.label, "E"));
+ {
+ auto duration_evt = builder.build(event, "E");
+ _rec->emit(std::move(duration_evt));
break;
+ }
}
// TODO: Add resurece measurement(e.g. RSS)
@@ -107,3 +152,7 @@ void EventCollector::onEvent(const Event &event)
emit_rusage(_rec, ts);
#endif
}
+
+// template instantiation
+template void EventCollector::onEvent<EventCollector::SubgEvent>(const SubgEvent &event);
+template void EventCollector::onEvent<EventCollector::OpSeqEvent>(const OpSeqEvent &event);
diff --git a/runtime/onert/core/src/util/EventCollector.h b/runtime/onert/core/src/util/EventCollector.h
index 8154be592..effb72373 100644
--- a/runtime/onert/core/src/util/EventCollector.h
+++ b/runtime/onert/core/src/util/EventCollector.h
@@ -17,7 +17,13 @@
#ifndef __ONERT_UTIL_EVENT_COLLECTOR_H__
#define __ONERT_UTIL_EVENT_COLLECTOR_H__
-#include "util/EventRecorder.h"
+#include "EventRecorder.h"
+
+#include "util/TracingCtx.h"
+
+#include <string>
+#include <utility>
+#include <vector>
class EventCollector
{
@@ -28,11 +34,69 @@ public:
END
};
+ struct SubgEvent;
+ struct OpEvent;
+
+ class EventVisitor
+ {
+ public:
+ virtual ~EventVisitor() = default;
+
+ virtual std::unique_ptr<DurationEvent> visit(const SubgEvent &, const std::string &) const
+ {
+ throw std::runtime_error("Please implement");
+ }
+ virtual std::unique_ptr<DurationEvent> visit(const OpEvent &, const std::string &) const
+ {
+ throw std::runtime_error("Please implement");
+ }
+ };
+
struct Event
{
+ const onert::util::TracingCtx *tracing_ctx;
+
Edge edge;
+ uint32_t session_index;
+ uint32_t subg_index;
+
+ // user-defined data: pairs of (key, value)
+ std::vector<std::pair<std::string, std::string>> userData;
+
+ protected:
+ Event(const onert::util::TracingCtx *a_tracing_ctx, Edge a_edge, uint32_t a_subg_index)
+ : tracing_ctx(a_tracing_ctx), edge(a_edge), session_index(tracing_ctx->getSessionId()),
+ subg_index(a_subg_index)
+ { /* empty */
+ }
+
+ virtual ~Event() = default;
+ };
+
+ struct SubgEvent : public Event
+ {
+ // constructor for subgraph start and end event
+ SubgEvent(const onert::util::TracingCtx *a_tracing_ctx, Edge a_edge, uint32_t a_subg_index)
+ : Event(a_tracing_ctx, a_edge, a_subg_index)
+ { /* empty */
+ }
+ };
+
+ // TODO Rename this to OperationEvent
+ struct OpSeqEvent : public Event
+ {
std::string backend;
- std::string label;
+ uint32_t op_index;
+ std::string op_name;
+
+ OpSeqEvent(const onert::util::TracingCtx *a_tracing_ctx, Edge a_edge, uint32_t a_subg_index,
+ const std::string a_backend, uint32_t a_op_index, const std::string a_op_name)
+ : Event(a_tracing_ctx, a_edge, a_subg_index)
+ {
+ backend.assign(a_backend);
+ op_index = a_op_index;
+ op_name.assign(a_op_name);
+ }
};
public:
@@ -42,7 +106,7 @@ public:
}
public:
- void onEvent(const Event &event);
+ template <typename EventT> void onEvent(const EventT &event);
protected:
EventRecorder *_rec;
diff --git a/runtime/onert/core/src/util/EventCollectorGlobal.cc b/runtime/onert/core/src/util/EventCollectorGlobal.cc
deleted file mode 100644
index d09b95210..000000000
--- a/runtime/onert/core/src/util/EventCollectorGlobal.cc
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/EventCollectorGlobal.h"
-
-#include <cassert>
-#include <fstream>
-#include <iostream>
-
-#include "util/ConfigSource.h"
-
-namespace onert
-{
-namespace util
-{
-
-EventCollectorGlobal::EventCollectorGlobal() : _recorder{}, _collector{&_recorder}
-{
- // DO NOTHING
-}
-
-EventCollectorGlobal::~EventCollectorGlobal()
-{
- if (!_recorder.empty())
- {
- try
- {
- // TODO Need better way for saved file path than the hardcoded path
- std::ofstream ofs{"trace.global.json"};
- _recorder.writeToFile(ofs);
- }
- catch (const std::exception &e)
- {
- std::cerr << "E: Fail to record event in EventCollectorGlobal: " << e.what() << std::endl;
- }
- }
-}
-
-EventCollectorGlobal &EventCollectorGlobal::get()
-{
- static EventCollectorGlobal instance;
- return instance;
-}
-
-EventDurationBlock::EventDurationBlock(const std::string &tag) : _tag{tag}
-{
- auto &glob = EventCollectorGlobal::get();
- glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "0", _tag});
-}
-EventDurationBlock::~EventDurationBlock()
-{
- auto &glob = EventCollectorGlobal::get();
- glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::END, "0", _tag});
-}
-
-EventDurationManual::EventDurationManual(const std::string &tag) : _tag{tag}, _pair{true} {}
-
-EventDurationManual::~EventDurationManual()
-{
- // Check if it has called begin-end pair
- assert(_pair);
-}
-
-void EventDurationManual::begin()
-{
- _pair = false;
- auto &glob = EventCollectorGlobal::get();
- glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "0", _tag});
-}
-
-void EventDurationManual::end()
-{
- assert(!_pair);
- _pair = true;
- auto &glob = EventCollectorGlobal::get();
- glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::END, "0", _tag});
-}
-
-} // namespace util
-} // namespace onert
diff --git a/runtime/onert/core/src/util/EventCollectorGlobal.h b/runtime/onert/core/src/util/EventCollectorGlobal.h
deleted file mode 100644
index 1027ec84d..000000000
--- a/runtime/onert/core/src/util/EventCollectorGlobal.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_UTIL_EVENT_COLLECTOR_GLOBAL_H__
-#define __ONERT_UTIL_EVENT_COLLECTOR_GLOBAL_H__
-
-#include "util/EventRecorder.h"
-#include "util/EventCollector.h"
-
-namespace onert
-{
-namespace util
-{
-
-/**
- * @brief Singleton class for event collection from anywhere in code
- *
- */
-class EventCollectorGlobal
-{
-public:
- /**
- * @brief Get the singleton object of this class
- *
- * @return EventCollectorGlobal& Singleton object
- */
- static EventCollectorGlobal &get();
-
-public:
- /**
- * @brief Getter for event collector object
- *
- * @return EventCollector& Collector object
- */
- EventCollector &collector() { return _collector; }
-
-private:
- EventCollectorGlobal();
- ~EventCollectorGlobal();
-
-private:
- EventRecorder _recorder;
- EventCollector _collector;
-};
-
-/**
- * @brief Helper class for emitting duration event which is handled automatically with ctor/dtor
- *
- */
-class EventDurationBlock
-{
-public:
- /**
- * @brief Raise a duration event with type of BEGIN
- *
- * @param tag A label for the duration event
- */
- EventDurationBlock(const std::string &tag);
- /**
- * @brief Raise a duration event with type of END
- *
- */
- ~EventDurationBlock();
-
-private:
- std::string _tag;
-};
-
-/**
- * @brief Helper class for emitting duration event which is handled manually
- *
- * Usage:
- * {
- * ...
- * EventDurationManual duration("some tag");
- * duration.begin();
- * ...
- * ... // Code for duration
- * ...
- * duration.end();
- * }
- *
- */
-class EventDurationManual
-{
-public:
- /**
- * @brief Construct a new Event Duration Manual object
- *
- * @param tag A label for the duration object
- */
- EventDurationManual(const std::string &tag);
- /**
- * @brief Destroy the Event Duration Manual object
- *
- */
- ~EventDurationManual();
-
- /**
- * @brief Raise a duration event with type of BEGIN
- *
- */
- void begin();
- /**
- * @brief Raise a duration event with type of END
- *
- */
- void end();
-
-private:
- std::string _tag;
- bool _pair;
-};
-
-} // namespace util
-} // namespace onert
-
-/**
- * Helper Macro Definitions
- *
- * HOW TO USE
- *
- * void f(args)
- * {
- * EVENT_DURATION_FUNCTION();
- * ...
- * if(cond)
- * {
- * EVENT_DURATION_REGION("if branch");
- * ...
- * }
- * ...
- * }
- */
-
-#define EVENT_DURATION_FUNCTION() \
- ::onert::util::EventDurationBlock __event_duration__##__LINE__ { __FUNCTION__ }
-
-#define EVENT_DURATION_REGION(tag) \
- ::onert::util::EventDurationBlock __event_duration__##__LINE__ { tag }
-
-#endif // __ONERT_UTIL_EVENT_COLLECTOR_GLOBAL_H__
diff --git a/runtime/onert/core/src/util/EventRecorder.cc b/runtime/onert/core/src/util/EventRecorder.cc
index 13a599bed..85a588d38 100644
--- a/runtime/onert/core/src/util/EventRecorder.cc
+++ b/runtime/onert/core/src/util/EventRecorder.cc
@@ -14,396 +14,13 @@
* limitations under the License.
*/
-#include "util/EventRecorder.h"
+#include "EventRecorder.h"
-#include <sstream>
-#include <vector>
-#include <unordered_map>
-#include <json/json.h>
-#include <assert.h>
-#include <utility>
-#include <map>
-#include <set>
-#include <stdint.h>
-
-// json type for Chrome Event Trace
-namespace
-{
-
-std::string quote(const std::string &value)
-{
- std::stringstream ss;
- ss << '"' << value << '"';
- return ss.str();
-}
-
-std::string field(const std::string &k, const std::string &v)
-{
- std::stringstream ss;
- ss << quote(k) << " : " << quote(v);
- return ss.str();
-}
-
-struct Content // One Entry in Chrome Event Trace
-{
- std::vector<std::pair<std::string, std::string>> flds;
- std::vector<std::pair<std::string, std::string>> args;
-};
-
-std::string object(const Content &content)
-{
- std::stringstream ss;
-
- ss << "{ ";
-
- ss << field(content.flds[0].first, content.flds[0].second);
-
- for (uint32_t n = 1; n < content.flds.size(); ++n)
- {
- ss << ", " << field(content.flds.at(n).first, content.flds.at(n).second);
- }
-
- if (content.args.size() > 0)
- {
- ss << ", " << quote("args") << " : { ";
- ss << field(content.args.at(0).first, content.args.at(0).second);
-
- for (uint32_t n = 1; n < content.args.size(); ++n)
- {
- ss << ", " << field(content.args.at(n).first, content.args.at(n).second);
- }
-
- ss << "}";
- }
-
- ss << " }";
-
- return ss.str();
-}
-
-void fill(Content &content, const Event &evt)
-{
- content.flds.emplace_back("name", evt.name);
- content.flds.emplace_back("pid", "0");
- content.flds.emplace_back("tid", evt.tid);
- content.flds.emplace_back("ph", evt.ph);
- content.flds.emplace_back("ts", evt.ts);
-}
-
-std::string object(const DurationEvent &evt)
-{
- Content content;
-
- fill(content, evt);
-
- return ::object(content);
-}
-
-std::string object(const CounterEvent &evt)
-{
- Content content;
-
- fill(content, evt);
-
- for (auto it = evt.values.begin(); it != evt.values.end(); ++it)
- {
- content.args.emplace_back(it->first, it->second);
- }
-
- return ::object(content);
-}
-
-} // namespace
-
-// md table type
-namespace
-{
-
-void writeMDTableRow(std::ostream &os, const std::vector<std::string> &list)
-{
- os << "| ";
- for (auto &key : list)
- {
- os << key << " | ";
- }
- os << "\n";
-}
-
-struct MDContent
-{
- std::string name;
- uint64_t begin_ts;
- uint64_t end_ts;
- uint32_t min_rss;
- uint32_t max_rss;
- uint32_t min_page_reclaims;
- uint32_t max_page_reclaims;
-
- MDContent()
- : begin_ts(0), end_ts(0), min_rss(UINT32_MAX), max_rss(0), min_page_reclaims(UINT32_MAX),
- max_page_reclaims(0)
- {
- // DO NOTHING
- }
-
- virtual ~MDContent() = default;
-
- void updateRss(uint32_t rss)
- {
- if (min_rss == UINT32_MAX)
- min_rss = rss;
- if (max_rss == 0)
- max_rss = rss;
-
- if (min_rss > rss)
- min_rss = rss;
- else if (max_rss < rss)
- max_rss = rss;
- }
-
- void updateMinflt(uint32_t minflt)
- {
- if (min_page_reclaims == UINT32_MAX)
- min_page_reclaims = minflt;
- if (max_page_reclaims == 0)
- max_page_reclaims = minflt;
-
- if (min_page_reclaims > minflt)
- min_page_reclaims = minflt;
- else if (max_page_reclaims < minflt)
- max_page_reclaims = minflt;
- }
-
- virtual void write(std::ostream &os) const = 0;
-};
-
-struct OpSeq : public MDContent
-{
- std::string backend;
- uint64_t graph_latency;
-
- struct OpSeqCmp
- {
- bool operator()(const OpSeq &lhs, const OpSeq &rhs) const
- {
- return lhs.begin_ts < rhs.begin_ts;
- }
- bool operator()(const OpSeq &lhs, const OpSeq &rhs) { return lhs.begin_ts < rhs.begin_ts; }
- bool operator()(OpSeq &lhs, OpSeq &rhs) { return lhs.begin_ts < rhs.begin_ts; }
- };
-
- void write(std::ostream &os) const override
- {
- uint64_t opseq_latency = end_ts - begin_ts;
- double opseq_per = static_cast<double>(opseq_latency) / graph_latency * 100.0;
- writeMDTableRow(os, {name, backend, std::to_string(opseq_latency), std::to_string(opseq_per),
- std::to_string(min_rss), std::to_string(max_rss),
- std::to_string(min_page_reclaims), std::to_string(max_page_reclaims)});
- }
-};
-
-struct Graph : public MDContent
-{
- std::set<OpSeq, OpSeq::OpSeqCmp> opseqs;
-
- void setOpSeqs(const std::map<std::string, OpSeq> &name_to_opseq)
- {
- uint64_t graph_latency = end_ts - begin_ts;
- for (auto it : name_to_opseq)
- {
- auto opseq = it.second;
- opseq.graph_latency = graph_latency;
-
- opseqs.insert(opseq);
-
- updateRss(opseq.min_rss);
- updateRss(opseq.max_rss);
- updateMinflt(opseq.min_page_reclaims);
- updateMinflt(opseq.max_page_reclaims);
- }
- }
-
- void write(std::ostream &os) const override
- {
- static std::vector<std::string> graph_headers{"latency(us)", "rss_min(kb)", "rss_max(kb)",
- "page_reclaims_min", "page_reclaims_max"};
-
- static std::vector<std::string> graph_headers_line{"-----------", "-------", "-------",
- "-----------------", "-----------------"};
-
- // Graph's Header
- writeMDTableRow(os, graph_headers);
- writeMDTableRow(os, graph_headers_line);
-
- // Graph's contents
- writeMDTableRow(os, {std::to_string(end_ts - begin_ts), std::to_string(min_rss),
- std::to_string(max_rss), std::to_string(min_page_reclaims),
- std::to_string(max_page_reclaims)});
-
- os << "\n";
-
- static std::vector<std::string> opseq_headers{
- "OpSeq name", "backend", "latency(us)", "latency(%)",
- "rss_min(kb)", "rss_max(kb)", "page_reclaims_min", "page_reclaims_max"};
-
- static std::vector<std::string> opseq_headers_line{
- "----------", "-------", "-----------", "-----------",
- "-------", "-------", "-----------------", "-----------------"};
-
- os << "## OpSequences \n";
-
- // OpSeq's Header
- writeMDTableRow(os, opseq_headers);
- writeMDTableRow(os, opseq_headers_line);
-
- // OpSeq's contents
- for (auto opseq : opseqs)
- {
- opseq.write(os);
- }
-
- os << "\n";
- }
-};
-
-struct MDTableBuilder
-{
- MDTableBuilder(const std::vector<DurationEvent> &duration_events,
- const std::vector<CounterEvent> &counter_events)
- : _duration_events(duration_events), _counter_events(counter_events)
- {
- for (const auto &evt : _counter_events)
- {
- uint64_t ts = std::stoull(evt.ts);
- auto &name = evt.name;
- assert(name.compare("maxrss") == 0 || name.compare("minflt") == 0);
- assert(evt.values.size() == 1);
- auto &val = evt.values.begin()->second;
- if (_ts_to_values.find(ts) == _ts_to_values.end())
- {
- std::pair<uint32_t, uint32_t> values;
- if (name.compare("maxrss") == 0)
- values.first = std::stoul(val);
- else
- values.second = std::stoul(val);
- _ts_to_values.insert({ts, values});
- }
- else
- {
- auto &values = _ts_to_values.at(ts);
- if (name.compare("maxrss") == 0)
- values.first = std::stoul(val);
- else
- values.second = std::stoul(val);
- }
- }
- }
-
- MDTableBuilder &build()
- {
- for (auto &it : divideGraph())
- {
- size_t begin_idx = it.first;
- size_t end_idx = it.second;
- std::map<std::string, OpSeq> name_to_opseq;
- for (size_t i = begin_idx + 1; i < end_idx; ++i)
- {
- const auto &evt = _duration_events[i];
- assert(evt.name.compare("Graph") != 0);
- assert(evt.ph.compare("B") == 0 || evt.ph.compare("E") == 0);
- if (evt.ph.compare("B") == 0)
- {
- assert(name_to_opseq.find(evt.name) == name_to_opseq.end());
- name_to_opseq.insert({evt.name, makeOpSeq(evt)});
- }
- else
- {
- assert(name_to_opseq.find(evt.name) != name_to_opseq.end());
- auto &opseq = name_to_opseq.at(evt.name);
- updateOpSeq(opseq, evt);
- }
- }
-
- _graphs.emplace_back(makeGraph(begin_idx, end_idx, name_to_opseq));
- }
-
- return *this;
- }
-
- std::vector<std::pair<size_t, size_t>> divideGraph()
- {
- std::vector<std::pair<size_t, size_t>> graph_idx_list; // pair<begin_idx, end_idx>
- for (size_t i = 0, begin_idx = 0; i < _duration_events.size(); ++i)
- {
- const auto &evt = _duration_events.at(i);
- if (evt.name.compare("Graph") == 0)
- {
- if (evt.ph.compare("B") == 0)
- begin_idx = i;
- else
- graph_idx_list.emplace_back(begin_idx, i);
- }
- }
- return graph_idx_list;
- }
-
- OpSeq makeOpSeq(const DurationEvent &evt)
- {
- OpSeq opseq;
- opseq.name = evt.name;
- opseq.begin_ts = std::stoull(evt.ts);
- opseq.updateRss(_ts_to_values.at(opseq.begin_ts).first);
- opseq.updateMinflt(_ts_to_values.at(opseq.begin_ts).second);
- opseq.backend = evt.tid;
- return opseq;
- }
-
- void updateOpSeq(OpSeq &opseq, const DurationEvent &evt)
- {
- opseq.end_ts = std::stoull(evt.ts);
- opseq.updateRss(_ts_to_values.at(opseq.end_ts).first);
- opseq.updateMinflt(_ts_to_values.at(opseq.end_ts).second);
- }
-
- Graph makeGraph(size_t begin_idx, size_t end_idx,
- const std::map<std::string, OpSeq> &name_to_opseq)
- {
- Graph graph;
- graph.name = "Graph";
- graph.begin_ts = std::stoull(_duration_events[begin_idx].ts);
- graph.updateRss(_ts_to_values.at(graph.begin_ts).first);
- graph.updateMinflt(_ts_to_values.at(graph.begin_ts).second);
- graph.end_ts = std::stoull(_duration_events[end_idx].ts);
- graph.updateRss(_ts_to_values.at(graph.end_ts).first);
- graph.updateMinflt(_ts_to_values.at(graph.end_ts).second);
- graph.setOpSeqs(name_to_opseq);
- return graph;
- }
-
- void write(std::ostream &os)
- {
- // Write contents
- for (size_t i = 0; i < _graphs.size(); ++i)
- {
- os << "# Graph " << i << "\n";
- _graphs.at(i).write(os);
- }
- }
-
- const std::vector<DurationEvent> &_duration_events;
- const std::vector<CounterEvent> &_counter_events;
- // timestamp to std::pair<maxrss, minflt>
- std::unordered_map<uint64_t, std::pair<uint32_t, uint32_t>> _ts_to_values;
- std::vector<Graph> _graphs;
-};
-
-} // namespace
-
-void EventRecorder::emit(const DurationEvent &evt)
+void EventRecorder::emit(std::unique_ptr<DurationEvent> &&evt)
{
std::lock_guard<std::mutex> lock{_mu};
- _duration_events.push_back(evt);
+ _duration_events.push_back(std::move(evt));
}
void EventRecorder::emit(const CounterEvent &evt)
@@ -412,146 +29,3 @@ void EventRecorder::emit(const CounterEvent &evt)
_counter_events.push_back(evt);
}
-
-void EventRecorder::writeToFile(std::ostream &os)
-{
- std::lock_guard<std::mutex> lock{_mu};
-
- switch (_write_format)
- {
- case WriteFormat::CHROME_TRACING:
- writeChromeTrace(os);
- break;
- case WriteFormat::SNPE_BENCHMARK:
- writeSNPEBenchmark(os);
- break;
- case WriteFormat::MD_TABLE:
- writeMDTable(os);
- break;
- default:
- assert(!"Invalid value");
- break;
- }
-}
-
-void EventRecorder::writeSNPEBenchmark(std::ostream &os)
-{
- Json::Value root;
- auto &exec_data = root["Execution_Data"] = Json::Value{Json::objectValue};
-
- struct Stat
- {
- uint64_t sum = 0;
- uint64_t count = 0;
- uint64_t max = 0;
- uint64_t min = std::numeric_limits<uint64_t>::max();
-
- void accumulate(uint64_t val)
- {
- sum += val;
- count++;
- max = std::max(max, val);
- min = std::min(min, val);
- }
- };
-
- // Memory
- {
- std::unordered_map<std::string, Stat> mem_stats;
- for (auto &evt : _counter_events)
- {
- auto &mem_stat = mem_stats[evt.name];
- uint64_t val = std::stoull(evt.values["value"]);
- mem_stat.accumulate(val);
- }
-
- auto &mem = exec_data["memory"] = Json::Value{Json::objectValue};
- for (auto &kv : mem_stats)
- {
- auto &key = kv.first;
- auto &val = kv.second;
- mem[key]["Avg_Size"] = val.sum / val.count;
- mem[key]["Max_Size"] = val.max;
- mem[key]["Min_Size"] = val.min;
- mem[key]["Runtime"] = "NA";
- }
- }
-
- // Operation Execution Time
- {
- // NOTE This assumes _duration_events is sorted by "ts" ascending
-
- // 2D keys : stats[tid][name]
- std::unordered_map<std::string, std::unordered_map<std::string, Stat>> stats;
- std::unordered_map<std::string, std::unordered_map<std::string, uint64_t>> begin_timestamps;
- for (auto &evt : _duration_events)
- {
- auto &stat = stats[evt.tid][evt.name];
- auto &begin_ts = begin_timestamps[evt.tid][evt.name];
- uint64_t timestamp = std::stoull(evt.ts);
- if (evt.ph == "B")
- {
- if (begin_ts != 0)
- throw std::runtime_error{"Invalid Data"};
- begin_ts = timestamp;
- }
- else if (evt.ph == "E")
- {
- if (begin_ts == 0 || timestamp < begin_ts)
- throw std::runtime_error{"Invalid Data"};
- stat.accumulate(timestamp - begin_ts);
- begin_ts = 0;
- }
- else
- throw std::runtime_error{"Invalid Data - invalid value for \"ph\" : \"" + evt.ph + "\""};
- }
-
- for (auto &kv : begin_timestamps)
- for (auto &kv2 : kv.second)
- if (kv2.second != 0)
- throw std::runtime_error{"Invalid Data - B and E pair does not match."};
-
- for (auto &kv : stats)
- {
- auto &tid = kv.first;
- auto &map = kv.second;
- auto &json_tid = exec_data[tid] = Json::Value{Json::objectValue};
- for (auto &kv : map)
- {
- auto &name = kv.first;
- auto &val = kv.second;
- json_tid[name]["Avg_Time"] = val.sum / val.count;
- json_tid[name]["Max_Time"] = val.max;
- json_tid[name]["Min_Time"] = val.min;
- json_tid[name]["Runtime"] = tid;
- }
- }
- }
-
- os << root;
-}
-
-void EventRecorder::writeChromeTrace(std::ostream &os)
-{
- os << "{\n";
- os << " " << quote("traceEvents") << ": [\n";
-
- for (auto &evt : _duration_events)
- {
- os << " " << object(evt) << ",\n";
- }
-
- for (auto &evt : _counter_events)
- {
- os << " " << object(evt) << ",\n";
- }
-
- os << " { }\n";
- os << " ]\n";
- os << "}\n";
-}
-
-void EventRecorder::writeMDTable(std::ostream &os)
-{
- MDTableBuilder(_duration_events, _counter_events).build().write(os);
-}
diff --git a/runtime/onert/core/src/util/EventRecorder.h b/runtime/onert/core/src/util/EventRecorder.h
index 37ec1a0f1..5cf03d8ac 100644
--- a/runtime/onert/core/src/util/EventRecorder.h
+++ b/runtime/onert/core/src/util/EventRecorder.h
@@ -17,28 +17,52 @@
#ifndef __ONERT_UTIL_EVENT_RECORDER_H__
#define __ONERT_UTIL_EVENT_RECORDER_H__
+#include "util/TracingCtx.h"
+
#include <map>
#include <memory>
#include <mutex>
-#include <ostream>
#include <vector>
+// refer to https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/edit#
struct Event
{
- std::string name;
- std::string tid;
- std::string ph; /* REQUIRED */
- std::string ts; /* REQUIRED */
+ const onert::util::TracingCtx *tracing_ctx;
+
+ std::string ph; // Event type.
+ std::string ts; // tracing clock of timestamp of this event
+ std::vector<std::pair<std::string, std::string>> args; // user-defined data: pairs of (key, value)
+
+ virtual ~Event() = default;
};
struct DurationEvent : public Event
{
- // TO BE FILLED
+ uint32_t session_index = 0;
+ uint32_t subg_index = 0;
+
+protected:
+ DurationEvent() = default;
+};
+
+struct SubgDurationEvent : public DurationEvent
+{ /* same with DurationEvent */
+};
+
+// TODO Rename it to OperationDurationEvent
+struct OpSeqDurationEvent : public DurationEvent
+{
+ // Note: DurationEvent's name and tid will be set by EventWriter
+ std::string backend;
+ uint32_t op_index;
+ std::string op_name;
};
struct CounterEvent : public Event
{
+ std::string name; // name of event
+ std::string tid; // thread ID
std::map<std::string, std::string> values;
};
@@ -50,35 +74,22 @@ struct CounterEvent : public Event
class EventRecorder
{
public:
- enum class WriteFormat
- {
- CHROME_TRACING,
- SNPE_BENCHMARK,
- MD_TABLE,
- };
-
-public:
EventRecorder() = default;
public:
- void emit(const DurationEvent &evt);
+ void emit(std::unique_ptr<DurationEvent> &&evt);
void emit(const CounterEvent &evt);
public:
- bool empty() { return _duration_events.empty() && _counter_events.empty(); }
- void writeToFile(std::ostream &os);
- void setWriteFormat(WriteFormat write_format) { _write_format = write_format; }
-
-private:
- void writeSNPEBenchmark(std::ostream &os);
- void writeChromeTrace(std::ostream &os);
- void writeMDTable(std::ostream &os);
+ const std::vector<std::unique_ptr<DurationEvent>> &duration_events() const
+ {
+ return _duration_events;
+ }
+ const std::vector<CounterEvent> &counter_events() const { return _counter_events; }
private:
std::mutex _mu;
- // TODO: Allow user to control write_format
- WriteFormat _write_format{WriteFormat::SNPE_BENCHMARK};
- std::vector<DurationEvent> _duration_events;
+ std::vector<std::unique_ptr<DurationEvent>> _duration_events;
std::vector<CounterEvent> _counter_events;
};
diff --git a/runtime/onert/core/src/util/EventWriter.cc b/runtime/onert/core/src/util/EventWriter.cc
new file mode 100644
index 000000000..ca4bd302e
--- /dev/null
+++ b/runtime/onert/core/src/util/EventWriter.cc
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "EventWriter.h"
+
+#include <cassert>
+
+// initialization
+std::mutex EventWriter::_mutex;
+
+void EventWriter::readyToFlush(std::unique_ptr<EventRecorder> &&recorder)
+{
+ {
+ std::unique_lock<std::mutex> lock{_mutex};
+
+ _recorders.emplace_back(std::move(recorder));
+
+ if (--_ref_count > 0)
+ return;
+ }
+ // The caller of this method is the last instance that uses EventWriter.
+ // Let's write log files.
+
+ // Note. According to an internal issue, let snpe json as just file name not '.snpe.json'
+ flush(WriteFormat::SNPE_BENCHMARK);
+ flush(WriteFormat::CHROME_TRACING);
+ flush(WriteFormat::MD_TABLE);
+}
+
+void EventWriter::flush(WriteFormat write_format)
+{
+ auto *writer = _actual_writers[write_format].get();
+ assert(writer);
+
+ writer->flush(_recorders);
+}
diff --git a/runtime/onert/core/src/util/EventWriter.h b/runtime/onert/core/src/util/EventWriter.h
new file mode 100644
index 000000000..0a35a8508
--- /dev/null
+++ b/runtime/onert/core/src/util/EventWriter.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_UTIL_EVENT_WRITER_H__
+#define __ONERT_UTIL_EVENT_WRITER_H__
+
+#include "EventRecorder.h"
+
+#include <string>
+#include <vector>
+#include <unordered_map>
+#include <mutex>
+#include <fstream>
+
+class EventFormatWriter
+{
+public:
+ EventFormatWriter(const std::string &filepath) : _os{filepath, std::ofstream::out} {}
+ virtual ~EventFormatWriter()
+ { /* empty */
+ }
+
+ virtual void flush(const std::vector<std::unique_ptr<EventRecorder>> &) = 0;
+
+protected:
+ std::ofstream _os;
+};
+
+class SNPEWriter : public EventFormatWriter
+{
+public:
+ SNPEWriter(const std::string &filepath) : EventFormatWriter(filepath)
+ { /* empty */
+ }
+ ~SNPEWriter() {}
+
+ void flush(const std::vector<std::unique_ptr<EventRecorder>> &) override;
+};
+
+class ChromeTracingWriter : public EventFormatWriter
+{
+public:
+ ChromeTracingWriter(const std::string &filepath) : EventFormatWriter(filepath)
+ { /* empty */
+ }
+ ~ChromeTracingWriter() {}
+
+ void flush(const std::vector<std::unique_ptr<EventRecorder>> &) override;
+
+private:
+ void flushOneRecord(const EventRecorder &);
+};
+
+class MDTableWriter : public EventFormatWriter
+{
+public:
+ MDTableWriter(const std::string &filepath) : EventFormatWriter(filepath)
+ { /* empty */
+ }
+ ~MDTableWriter() {}
+
+ void flush(const std::vector<std::unique_ptr<EventRecorder>> &) override;
+};
+
+#include <mutex>
+
+class EventWriter
+{
+public:
+ enum class WriteFormat
+ {
+ CHROME_TRACING,
+ SNPE_BENCHMARK,
+ MD_TABLE,
+ };
+
+ /**
+ * @brief Retuens a singleton object
+ */
+ static EventWriter *get(const std::string &filename)
+ {
+ std::unique_lock<std::mutex> lock{_mutex};
+
+ static EventWriter singleton(filename);
+ return &singleton;
+ }
+
+ /**
+ * @brief Call this when observer which use EventWriter starts
+ */
+ void startToUse()
+ {
+ std::unique_lock<std::mutex> lock{_mutex};
+ _ref_count++;
+ }
+
+ /**
+ * @brief Call this when observer which use EventWriter finishes.
+ * After multiple observers calls this method, the reference count will eventually be 0.
+ * Then, EventWriter will write profiling result file.
+ */
+ void readyToFlush(std::unique_ptr<EventRecorder> &&recorder);
+
+private:
+ EventWriter(const std::string &filepath) : _ref_count(0)
+ {
+ std::string snpe_log_name(filepath);
+ std::string chrome_tracing_log_name(filepath + ".chrome.json");
+ std::string md_table_log_name(filepath + ".table.md");
+
+ _actual_writers[WriteFormat::SNPE_BENCHMARK] = std::make_unique<SNPEWriter>(snpe_log_name);
+ _actual_writers[WriteFormat::CHROME_TRACING] =
+ std::make_unique<ChromeTracingWriter>(chrome_tracing_log_name);
+ _actual_writers[WriteFormat::MD_TABLE] = std::make_unique<MDTableWriter>(md_table_log_name);
+ };
+
+ void flush(WriteFormat write_format);
+
+private:
+ static std::mutex _mutex;
+
+ // number of observer of an executor that want to write profiling data
+ int32_t _ref_count;
+
+ // one recorder object per executor
+ std::vector<std::unique_ptr<EventRecorder>> _recorders;
+
+ std::unordered_map<WriteFormat, std::unique_ptr<EventFormatWriter>> _actual_writers;
+};
+
+#endif // __ONERT_UTIL_EVENT_WRITER_H__
diff --git a/runtime/onert/core/src/util/EnvConfigSource.cc b/runtime/onert/core/src/util/Index.test.cc
index 0d25b7353..ff73e5e59 100644
--- a/runtime/onert/core/src/util/EnvConfigSource.cc
+++ b/runtime/onert/core/src/util/Index.test.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,27 +14,21 @@
* limitations under the License.
*/
-#include "util/EnvConfigSource.h"
+#include "util/Index.h"
-#include <cstdlib>
+#include <gtest/gtest.h>
-namespace onert
-{
-namespace util
-{
+using Index = ::onert::util::Index<uint32_t, struct TestTag>;
-std::string EnvConfigSource::get(const std::string &key) const
+TEST(Index, neg_index_test)
{
- const char *value = std::getenv(key.c_str());
- if (value != nullptr)
- {
- return value;
- }
- else
- {
- return GeneralConfigSource::get(key);
- }
-}
+ Index idx1{1u};
+ Index idx2{2u};
+ Index idx3{idx1};
-} // namespace util
-} // namespace onert
+ ASSERT_EQ(idx1, 1);
+ ASSERT_EQ(idx1, 1u);
+ ASSERT_EQ(idx1.value(), 1u);
+ ASSERT_NE(idx1, idx2);
+ ASSERT_EQ(idx1, idx3);
+}
diff --git a/runtime/onert/core/src/util/MDTableEventWriter.cc b/runtime/onert/core/src/util/MDTableEventWriter.cc
new file mode 100644
index 000000000..e7d90eec4
--- /dev/null
+++ b/runtime/onert/core/src/util/MDTableEventWriter.cc
@@ -0,0 +1,365 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "EventWriter.h"
+
+#include <cassert>
+#include <map>
+#include <set>
+#include <sstream>
+#include <stdint.h>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+// md table type
+namespace
+{
+
+void writeMDTableRow(std::ostream &os, const std::vector<std::string> &list)
+{
+ os << "| ";
+ for (const auto &key : list)
+ {
+ os << key << " | ";
+ }
+ os << "\n";
+}
+
+struct MDContent
+{
+ std::string name;
+ uint64_t begin_ts;
+ uint64_t end_ts;
+ uint32_t min_rss;
+ uint32_t max_rss;
+ uint32_t min_page_reclaims;
+ uint32_t max_page_reclaims;
+
+ MDContent()
+ : begin_ts(0), end_ts(0), min_rss(UINT32_MAX), max_rss(0), min_page_reclaims(UINT32_MAX),
+ max_page_reclaims(0)
+ {
+ // DO NOTHING
+ }
+
+ virtual ~MDContent() = default;
+
+ void updateRss(uint32_t rss)
+ {
+ if (min_rss == UINT32_MAX)
+ min_rss = rss;
+ if (max_rss == 0)
+ max_rss = rss;
+
+ if (min_rss > rss)
+ min_rss = rss;
+ else if (max_rss < rss)
+ max_rss = rss;
+ }
+
+ void updateMinflt(uint32_t minflt)
+ {
+ if (min_page_reclaims == UINT32_MAX)
+ min_page_reclaims = minflt;
+ if (max_page_reclaims == 0)
+ max_page_reclaims = minflt;
+
+ if (min_page_reclaims > minflt)
+ min_page_reclaims = minflt;
+ else if (max_page_reclaims < minflt)
+ max_page_reclaims = minflt;
+ }
+
+ virtual void write(std::ostream &os) const = 0;
+};
+
+struct Operation : public MDContent
+{
+ std::string backend;
+ uint64_t graph_latency;
+
+ struct OperationCmp
+ {
+ bool operator()(const Operation &lhs, const Operation &rhs) const
+ {
+ return lhs.begin_ts < rhs.begin_ts;
+ }
+ bool operator()(const Operation &lhs, const Operation &rhs)
+ {
+ return lhs.begin_ts < rhs.begin_ts;
+ }
+ bool operator()(Operation &lhs, Operation &rhs) { return lhs.begin_ts < rhs.begin_ts; }
+ };
+
+ void write(std::ostream &os) const override
+ {
+ uint64_t op_latency = end_ts - begin_ts;
+ double op_per = static_cast<double>(op_latency) / graph_latency * 100.0;
+ writeMDTableRow(os, {name, backend, std::to_string(op_latency), std::to_string(op_per),
+ std::to_string(min_rss), std::to_string(max_rss),
+ std::to_string(min_page_reclaims), std::to_string(max_page_reclaims)});
+ }
+};
+
+struct Graph : public MDContent
+{
+ std::set<Operation, Operation::OperationCmp> ops;
+ std::string session_index;
+ std::string subgraph_index;
+
+ void setOperations(const std::map<std::string, Operation> &name_to_op)
+ {
+ uint64_t graph_latency = end_ts - begin_ts;
+ for (auto &&it : name_to_op)
+ {
+ auto op = it.second;
+ op.graph_latency = graph_latency;
+
+ ops.insert(op);
+
+ updateRss(op.min_rss);
+ updateRss(op.max_rss);
+ updateMinflt(op.min_page_reclaims);
+ updateMinflt(op.max_page_reclaims);
+ }
+ }
+
+ void write(std::ostream &os) const override
+ {
+ static std::vector<std::string> graph_headers{"latency(us)", "rss_min(kb)", "rss_max(kb)",
+ "page_reclaims_min", "page_reclaims_max"};
+
+ static std::vector<std::string> graph_headers_line{"-----------", "-------", "-------",
+ "-----------------", "-----------------"};
+
+ // Graph's Header
+ writeMDTableRow(os, graph_headers);
+ writeMDTableRow(os, graph_headers_line);
+
+ // Graph's contents
+ writeMDTableRow(os, {std::to_string(end_ts - begin_ts), std::to_string(min_rss),
+ std::to_string(max_rss), std::to_string(min_page_reclaims),
+ std::to_string(max_page_reclaims)});
+
+ os << "\n";
+
+ static std::vector<std::string> op_headers{
+ "Op name", "backend", "latency(us)", "latency(%)",
+ "rss_min(kb)", "rss_max(kb)", "page_reclaims_min", "page_reclaims_max"};
+
+ static std::vector<std::string> op_headers_line{
+ "-------", "-------", "-----------", "-----------",
+ "-------", "-------", "-----------------", "-----------------"};
+
+ os << "## Op \n";
+
+ // Operation's Header
+ writeMDTableRow(os, op_headers);
+ writeMDTableRow(os, op_headers_line);
+
+ // Operation's contents
+ for (auto &&op : ops)
+ {
+ op.write(os);
+ }
+
+ os << "\n";
+ }
+};
+
+std::string getLabel(const OpSeqDurationEvent &evt)
+{
+ std::string subg_label("$" + std::to_string(evt.subg_index) + " subgraph");
+ std::string op_label("@" + std::to_string(evt.op_index) + " " + evt.op_name);
+
+ return subg_label + " " + op_label;
+}
+
+struct MDTableBuilder
+{
+ MDTableBuilder(const std::vector<std::unique_ptr<DurationEvent>> &duration_events,
+ const std::vector<CounterEvent> &counter_events)
+ : _duration_events(duration_events), _counter_events(counter_events)
+ {
+// when ready with low overhead in release build
+#ifdef DEBUG
+ for (const auto &evt : _counter_events)
+ {
+ uint64_t ts = std::stoull(evt.ts);
+ auto &name = evt.name;
+ assert(name.compare("maxrss") == 0 || name.compare("minflt") == 0);
+ assert(evt.values.size() == 1);
+ auto &val = evt.values.begin()->second;
+ if (_ts_to_values.find(ts) == _ts_to_values.end())
+ {
+ std::pair<uint32_t, uint32_t> values;
+ if (name.compare("maxrss") == 0)
+ values.first = std::stoul(val);
+ else
+ values.second = std::stoul(val);
+ _ts_to_values.insert({ts, values});
+ }
+ else
+ {
+ auto &values = _ts_to_values.at(ts);
+ if (name.compare("maxrss") == 0)
+ values.first = std::stoul(val);
+ else
+ values.second = std::stoul(val);
+ }
+ }
+#endif
+ }
+
+ MDTableBuilder &build()
+ {
+ for (const auto &it : divideGraph())
+ {
+ size_t begin_idx = it.first;
+ size_t end_idx = it.second;
+ std::map<std::string, Operation> name_to_op;
+ for (size_t i = begin_idx + 1; i < end_idx; ++i)
+ {
+ const auto *evt = dynamic_cast<const OpSeqDurationEvent *>(_duration_events[i].get());
+ if (evt == nullptr)
+ continue;
+
+ const std::string evt_name = getLabel(*evt);
+ assert(evt->ph.compare("B") == 0 || evt->ph.compare("E") == 0);
+ if (evt->ph.compare("B") == 0)
+ {
+ assert(name_to_op.find(evt_name) == name_to_op.end());
+ name_to_op.insert({evt_name, makeOperation(*evt)});
+ }
+ else
+ {
+ assert(name_to_op.find(evt_name) != name_to_op.end());
+ auto &op = name_to_op.at(evt_name);
+ updateOperation(op, *evt);
+ }
+ }
+
+ _graphs.emplace_back(makeGraph(begin_idx, end_idx, name_to_op));
+ }
+
+ return *this;
+ }
+
+ std::vector<std::pair<size_t, size_t>> divideGraph()
+ {
+ std::vector<std::pair<size_t, size_t>> graph_idx_list; // pair<begin_idx, end_idx>
+ for (size_t i = 0, begin_idx = 0; i < _duration_events.size(); ++i)
+ {
+ const auto subg_evt = dynamic_cast<const SubgDurationEvent *>(_duration_events.at(i).get());
+ if (subg_evt == nullptr)
+ continue;
+
+ if (subg_evt->ph.compare("B") == 0)
+ begin_idx = i;
+ else
+ graph_idx_list.emplace_back(begin_idx, i);
+ }
+ return graph_idx_list;
+ }
+
+ Operation makeOperation(const OpSeqDurationEvent &evt)
+ {
+ Operation op;
+ const std::string &evt_name = getLabel(evt);
+ op.name = evt_name;
+ op.begin_ts = std::stoull(evt.ts);
+ op.backend = evt.backend;
+#ifdef DEBUG
+ op.updateRss(_ts_to_values.at(op.begin_ts).first);
+ op.updateMinflt(_ts_to_values.at(op.begin_ts).second);
+#else
+ op.updateRss(0);
+ op.updateMinflt(0);
+#endif
+ return op;
+ }
+
+ void updateOperation(Operation &op, const DurationEvent &evt)
+ {
+ op.end_ts = std::stoull(evt.ts);
+#ifdef DEBUG
+ op.updateRss(_ts_to_values.at(op.end_ts).first);
+ op.updateMinflt(_ts_to_values.at(op.end_ts).second);
+#else
+ op.updateRss(0);
+ op.updateMinflt(0);
+#endif
+ }
+
+ Graph makeGraph(size_t begin_idx, size_t end_idx,
+ const std::map<std::string, Operation> &name_to_op)
+ {
+ Graph graph;
+ graph.name = "Subgraph";
+ graph.begin_ts = std::stoull(_duration_events[begin_idx]->ts);
+ graph.end_ts = std::stoull(_duration_events[end_idx]->ts);
+ graph.setOperations(name_to_op);
+
+ for (const auto &arg : _duration_events[end_idx]->args)
+ {
+ if (arg.first == "session")
+ graph.session_index = arg.second;
+ if (arg.first == "subgraph")
+ graph.subgraph_index = arg.second;
+ }
+
+#ifdef DEBUG
+ graph.updateRss(_ts_to_values.at(graph.begin_ts).first);
+ graph.updateMinflt(_ts_to_values.at(graph.begin_ts).second);
+ graph.updateRss(_ts_to_values.at(graph.end_ts).first);
+ graph.updateMinflt(_ts_to_values.at(graph.end_ts).second);
+#else
+ graph.updateRss(0);
+ graph.updateMinflt(0);
+#endif
+ return graph;
+ }
+
+ void write(std::ostream &os)
+ {
+ // Write contents
+ for (size_t i = 0; i < _graphs.size(); ++i)
+ {
+ auto &graph = _graphs.at(i);
+ os << "# Session: " << graph.session_index << ", Subgraph: " << graph.subgraph_index
+ << ", Running count: " << i << "\n";
+ _graphs.at(i).write(os);
+ }
+ }
+
+ const std::vector<std::unique_ptr<DurationEvent>> &_duration_events;
+ const std::vector<CounterEvent> &_counter_events;
+
+ // timestamp to std::pair<maxrss, minflt>
+ std::unordered_map<uint64_t, std::pair<uint32_t, uint32_t>> _ts_to_values;
+ std::vector<Graph> _graphs;
+};
+
+} // namespace
+
+void MDTableWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &records)
+{
+ for (const auto &recorder : records)
+ {
+ MDTableBuilder(recorder->duration_events(), recorder->counter_events()).build().write(_os);
+ }
+}
diff --git a/runtime/onert/core/src/util/ObjectManager.test.cc b/runtime/onert/core/src/util/ObjectManager.test.cc
new file mode 100644
index 000000000..3fe735732
--- /dev/null
+++ b/runtime/onert/core/src/util/ObjectManager.test.cc
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/Index.h"
+#include "util/ObjectManager.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert;
+
+struct TestTag;
+using Index = typename util::Index<uint32_t, TestTag>;
+
+TEST(ObjectManager, emplace)
+{
+ util::ObjectManager<Index, int> man;
+
+ auto index = man.emplace(100);
+ ASSERT_EQ(man.at(index), 100);
+}
+
+TEST(ObjectManager, neg_remove_1)
+{
+ util::ObjectManager<Index, int> man;
+
+ Index index = man.emplace(100);
+ ASSERT_TRUE(man.exist(index));
+ ASSERT_EQ(man.at(index), 100);
+
+ man.remove(index);
+ ASSERT_FALSE(man.exist(index));
+}
+
+TEST(ObjectManager, neg_remove_2)
+{
+ util::ObjectManager<Index, int> man;
+
+ auto index0 = man.emplace(100);
+ auto index1 = man.emplace(200);
+ ASSERT_TRUE(man.exist(index0));
+ ASSERT_EQ(man.at(index0), 100);
+ ASSERT_TRUE(man.exist(index1));
+ ASSERT_EQ(man.at(index1), 200);
+
+ man.remove(index0);
+ ASSERT_FALSE(man.exist(index0));
+ ASSERT_TRUE(man.exist(index1));
+ ASSERT_EQ(man.at(index1), 200);
+}
+
+TEST(ObjectManager, push)
+{
+ util::ObjectManager<Index, int> man;
+
+ // Not specify index
+ auto index = man.push(std::make_unique<int>(100));
+ ASSERT_EQ(man.at(index), 100);
+
+ // Specify index
+ auto index2 = man.push(std::make_unique<int>(200), Index{33});
+ ASSERT_EQ(index2.value(), 33);
+ ASSERT_EQ(man.at(index2), 200);
+
+ auto index3 = man.push(std::make_unique<int>(300));
+ // NOTE auto-generated index number is always (biggest index in the ObjectManager + 1)
+ ASSERT_EQ(index3.value(), 34);
+ ASSERT_EQ(man.at(index3), 300);
+
+ auto index4 = man.push(std::make_unique<int>(400), Index{22});
+ ASSERT_EQ(index4.value(), 22);
+ ASSERT_EQ(man.at(index4), 400);
+
+ auto index5 = man.push(std::make_unique<int>(500));
+ // NOTE auto-generated index number is always (biggest index in the ObjectManager + 1)
+ ASSERT_EQ(index5.value(), 35);
+ ASSERT_EQ(man.at(index5), 500);
+}
+
+TEST(ObjectManager, neg_push)
+{
+ util::ObjectManager<Index, int> man;
+
+ // Specify index
+ auto index = man.push(std::make_unique<int>(100), Index{55});
+ ASSERT_EQ(index.value(), 55);
+ ASSERT_EQ(man.at(index), 100);
+
+ // Specify the same index
+ auto index2 = man.push(std::make_unique<int>(200), Index{55});
+ ASSERT_FALSE(index2.valid());
+}
+
+static const uint32_t kMaxUInt32 = std::numeric_limits<uint32_t>::max();
+
+TEST(ObjectManager, neg_push_undefined_index)
+{
+ util::ObjectManager<Index, int> man;
+
+ // Try inserting invalid(undefined) index
+ auto index = man.push(std::make_unique<int>(100), Index{kMaxUInt32});
+ ASSERT_FALSE(index.valid());
+ ASSERT_EQ(man.size(), 0);
+}
+
+TEST(ObjectManager, neg_push_max_index)
+{
+ util::ObjectManager<Index, int> man;
+
+ // Insert an object with maximum valid index
+ auto index = man.push(std::make_unique<int>(100), Index{kMaxUInt32 - 1});
+ ASSERT_EQ(index.value(), kMaxUInt32 - 1);
+ ASSERT_EQ(man.at(index), 100);
+ ASSERT_EQ(man.size(), 1);
+
+ // Reached to the final index so next push/emplace must fail
+ auto index2 = man.push(std::make_unique<int>(200));
+ ASSERT_EQ(man.size(), 1);
+ ASSERT_FALSE(index2.valid());
+}
+
+TEST(ObjectManager, neg_emplace_max_index)
+{
+ util::ObjectManager<Index, int> man;
+
+ // Insert an object with maximum valid index
+ auto index = man.push(std::make_unique<int>(100), Index{kMaxUInt32 - 1});
+ ASSERT_EQ(index.value(), kMaxUInt32 - 1);
+ ASSERT_EQ(man.at(index), 100);
+ ASSERT_EQ(man.size(), 1);
+
+ // Reached to the final index so next push/emplace must fail
+ auto index3 = man.emplace(200);
+ ASSERT_EQ(man.size(), 1);
+ ASSERT_FALSE(index3.valid());
+}
+
+TEST(ObjectManager, const_iterate)
+{
+ util::ObjectManager<Index, int> man;
+
+ auto index0 = man.emplace(100);
+ auto index1 = man.emplace(200);
+ auto index2 = man.emplace(300);
+
+ int sum = 0;
+ man.iterate([&](const Index &index, const int &val) { sum += val; });
+ ASSERT_EQ(sum, 600);
+}
+
+TEST(ObjectManager, non_const_iterate)
+{
+ util::ObjectManager<Index, int> man;
+
+ auto index0 = man.emplace(100);
+ auto index1 = man.emplace(200);
+ auto index2 = man.emplace(300);
+
+ man.iterate([&](const Index &index, int &val) { val += 1; });
+ ASSERT_EQ(man.at(index0), 101);
+ ASSERT_EQ(man.at(index1), 201);
+ ASSERT_EQ(man.at(index2), 301);
+}
+
+TEST(ObjectManager, set)
+{
+ util::ObjectManager<Index, int> man;
+ auto index = man.set(Index{1}, std::make_unique<int>(100)); // Insert
+ ASSERT_EQ(index, Index{1});
+ auto index2 = man.set(index, std::make_unique<int>(200)); // Overwrite
+ ASSERT_EQ(index2, index);
+ ASSERT_EQ(man.at(index2), 200);
+}
+
+TEST(ObjectManager, neg_set)
+{
+ auto v = std::make_unique<int>(100);
+ util::ObjectManager<Index, int> man;
+ auto index = man.set(Index{}, std::move(v)); // Try set with an invalid index
+ ASSERT_EQ(index, Index{});
+ ASSERT_FALSE(index.valid());
+ ASSERT_NE(v, nullptr); // v must be kept when failure
+}
+
+TEST(ObjectManager, getRawPtr)
+{
+ auto v = std::make_unique<int>(100);
+ auto v_ptr = v.get();
+ util::ObjectManager<Index, int> man;
+ auto index = man.push(std::move(v));
+ ASSERT_EQ(v_ptr, man.getRawPtr(index));
+}
+
+TEST(ObjectManager, neg_getRawPtr)
+{
+ util::ObjectManager<Index, int> man;
+ auto ptr = man.getRawPtr(Index{1});
+ ASSERT_EQ(ptr, nullptr);
+}
diff --git a/runtime/onert/core/src/util/SNPEEventWriter.cc b/runtime/onert/core/src/util/SNPEEventWriter.cc
new file mode 100644
index 000000000..87bbfc662
--- /dev/null
+++ b/runtime/onert/core/src/util/SNPEEventWriter.cc
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "EventWriter.h"
+
+#include <json/json.h>
+
+#include <cassert>
+#include <unordered_map>
+#include <utility>
+
+/**
+ * @brief Version of SNPE format
+ * In version 1
+ * - There is no "version" field in Json
+ * - Only one subgraph is supported
+ * - Operation name is a form of "$3 ADD"
+ *
+ * In version 2,
+ * - "version" : "2" was added in Json
+ * - Multiple session and multiple subgraphs are supported
+ * - When there is only one session, operation name is a form of "$2 subgraph $3 ADD",
+ * meaning ADD op whose operation index 3 in a subgraph whose index is 2
+ * - When there are two or more sessions, operation name is a form of
+ * "$1 session $2 subgraph $3 ADD", meaning ADD op whose operation index 3
+ * in a subgraph whose index is 2, which was run in 1st session.
+ */
+#define SNPE_JSON_SCHEMA_VERSION "2"
+
+namespace
+{
+
+std::string getLabel(const DurationEvent &evt)
+{
+ if (auto evt_ptr = dynamic_cast<const OpSeqDurationEvent *>(&evt))
+ {
+ std::string subg_label("$" + std::to_string(evt_ptr->subg_index) + " subgraph");
+ std::string op_label("$" + std::to_string(evt_ptr->op_index) + " " + evt_ptr->op_name);
+
+ // Note : At this moment, there is only one thread running for EventWriter
+ if (evt_ptr->tracing_ctx->hasMultipleSessions())
+ {
+ std::string session_label("$" + std::to_string(evt_ptr->session_index) + " session");
+ return session_label + " " + subg_label + " " + op_label;
+ }
+ else
+ {
+ // When there is only one session, do not include session info
+ // Refer to https://github.sec.samsung.net/STAR/nnfw/issues/11436#issuecomment-930332
+ return subg_label + " " + op_label;
+ }
+ }
+ else // SubgEvent
+ return "Graph";
+}
+
+std::string getBackend(const DurationEvent &evt)
+{
+ if (auto evt_ptr = dynamic_cast<const OpSeqDurationEvent *>(&evt))
+ return evt_ptr->backend;
+ else // SubbEvent
+ return "runtime";
+}
+
+} // namespace
+
+void SNPEWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &recorders)
+{
+ struct Stat
+ {
+ uint64_t sum = 0;
+ uint64_t count = 0;
+ uint64_t max = 0;
+ uint64_t min = std::numeric_limits<uint64_t>::max();
+
+ void accumulate(uint64_t val)
+ {
+ sum += val;
+ count++;
+ max = std::max(max, val);
+ min = std::min(min, val);
+ }
+ };
+
+ Json::Value root;
+ root["version"] = SNPE_JSON_SCHEMA_VERSION;
+
+ auto &exec_data = root["Execution_Data"] = Json::Value{Json::objectValue};
+
+ // Memory
+ {
+ std::unordered_map<std::string, Stat> mem_stats;
+ for (const auto &recorder : recorders)
+ {
+ for (const auto &evt : recorder->counter_events())
+ {
+ auto &mem_stat = mem_stats[evt.name];
+ uint64_t val = std::stoull(evt.values.at("value"));
+ mem_stat.accumulate(val);
+ }
+ }
+
+ auto &mem = exec_data["memory"] = Json::Value{Json::objectValue};
+ for (const auto &kv : mem_stats)
+ {
+ auto &key = kv.first;
+ auto &val = kv.second;
+ mem[key]["Avg_Size"] = val.sum / val.count;
+ mem[key]["Max_Size"] = val.max;
+ mem[key]["Min_Size"] = val.min;
+ mem[key]["Runtime"] = "NA";
+ }
+ }
+
+ // Operation Execution Time
+ {
+ // NOTE This assumes _duration_events is sorted by "ts" ascending
+
+ // 2D keys : stats[tid][name]
+ std::unordered_map<std::string, std::unordered_map<std::string, Stat>> stats;
+ std::unordered_map<std::string, std::unordered_map<std::string, uint64_t>> begin_timestamps;
+ for (const auto &recorder : recorders)
+ {
+ for (const auto &evt : recorder->duration_events())
+ {
+ std::string evt_name = getLabel(*evt);
+ std::string evt_tid = getBackend(*evt);
+
+ auto &stat = stats[evt_tid][evt_name];
+ auto &begin_ts = begin_timestamps[evt_tid][evt_name];
+ uint64_t timestamp = std::stoull(evt->ts);
+ if (evt->ph == "B")
+ {
+ if (begin_ts != 0)
+ throw std::runtime_error{"Invalid Data"};
+ begin_ts = timestamp;
+ }
+ else if (evt->ph == "E")
+ {
+ if (begin_ts == 0 || timestamp < begin_ts)
+ throw std::runtime_error{"Invalid Data"};
+ stat.accumulate(timestamp - begin_ts);
+ begin_ts = 0;
+ }
+ else
+ throw std::runtime_error{"Invalid Data - invalid value for \"ph\" : \"" + evt->ph + "\""};
+ }
+ }
+
+ for (const auto &kv : begin_timestamps)
+ for (const auto &kv2 : kv.second)
+ if (kv2.second != 0)
+ throw std::runtime_error{"Invalid Data - B and E pair does not match."};
+
+ for (const auto &kv : stats)
+ {
+ const auto &tid = kv.first;
+ const auto &map = kv.second;
+ auto &json_tid = exec_data[tid] = Json::Value{Json::objectValue};
+ for (const auto &kv : map)
+ {
+ auto &name = kv.first;
+ auto &val = kv.second;
+ json_tid[name]["Avg_Time"] = val.sum / val.count;
+ json_tid[name]["Max_Time"] = val.max;
+ json_tid[name]["Min_Time"] = val.min;
+ json_tid[name]["Runtime"] = tid;
+ }
+ }
+ }
+
+ _os << root;
+}
diff --git a/runtime/onert/core/src/util/ShapeInference.cc b/runtime/onert/core/src/util/ShapeInference.cc
index 95c15049d..862d6f725 100644
--- a/runtime/onert/core/src/util/ShapeInference.cc
+++ b/runtime/onert/core/src/util/ShapeInference.cc
@@ -22,6 +22,7 @@
#include "util/logging.h"
#include <cassert>
+#include <numeric>
#include <sstream>
#include <cmath>
@@ -72,6 +73,19 @@ ir::Shape broadcastShapes(const ir::Shape &lhs_shape, const ir::Shape &rhs_shape
} // namespace
+namespace bcq
+{
+inline int getOutputSize(const ir::Shape &cluster_shape, const int32_t *cluster_buf)
+{
+ int size = 0;
+ for (int idx = 0; idx < cluster_shape.dim(0); idx++)
+ {
+ size += cluster_buf[idx * 2 + 1];
+ }
+ return size;
+}
+} // namespace bcq
+
//
// Shape inference
//
@@ -97,10 +111,9 @@ std::pair<int, int> calcConvLikeHeightAndWidth(const int in_h, const int in_w, c
break;
case ir::PaddingType::EXPLICIT:
out_h =
- (in_h + pad.param.top + pad.param.bottom - effective_filter_h_size) / stride.vertical + 1;
+ (in_h + pad.param.top + pad.param.bottom - effective_filter_h_size) / stride.vertical + 1;
out_w =
- (in_w + pad.param.left + pad.param.right - effective_filter_w_size) / stride.horizontal +
- 1;
+ (in_w + pad.param.left + pad.param.right - effective_filter_w_size) / stride.horizontal + 1;
break;
default:
assert(false);
@@ -114,8 +127,13 @@ ir::Shape inferEltwiseShape(const ir::Shape &lhs_shape, const ir::Shape &rhs_sha
return broadcastShapes(lhs_shape, rhs_shape);
}
-ir::Shape inferArgMaxShape(const ir::Shape &input_shape, int axis, int rank)
+ir::Shape inferArgMinMaxShape(const ir::Shape &input_shape, int axis, int rank)
{
+ if (axis < 0 || axis >= rank)
+ {
+ throw std::runtime_error("ArgMinMax shape inference: Wrong axis value " + std::to_string(axis));
+ }
+
ir::Shape out_shape;
for (int idx = 0; idx < rank; ++idx)
{
@@ -171,11 +189,12 @@ ir::Shape inferReduceShape(const ir::Shape &input_shape, const std::vector<int>
for (int i = 0; i < num_axis; ++i)
{
int current = axes[i];
+ if (!(-input_num_dims <= current && current < input_num_dims))
+ throw std::runtime_error{"Invalid dim value " + std::to_string(current)};
if (current < 0)
{
current += input_num_dims;
}
- assert(0 <= current && current < input_num_dims);
for (int j = 0; j < i; ++j)
{
int previous = axes[j];
@@ -259,19 +278,24 @@ ir::Shape inferBatchMatMulShape(const ir::Shape &lhs_shape, const ir::Shape &rhs
return output_shape;
}
-ir::Shape inferBroadcastToShape(const ir::Shape wshape, const int32_t *shape_buffer)
+/*
+ * shp_shape : SHAPE input tensor's shape
+ * shp_buf : SHAPE input tensor's buffer
+ */
+ir::Shape inferBroadcastToShape(const ir::Shape shp_shape, const int32_t *shp_buf)
{
- const int num_elements = wshape.num_elements();
+
+ const int num_elements = shp_shape.num_elements();
assert(num_elements != 0);
- assert(shape_buffer);
+ assert(shp_buf);
ir::Shape new_shape(num_elements);
for (int i = 0; i < num_elements; ++i)
{
- assert(shape_buffer[i] != 0); // It shouldn't be 0.
- new_shape.dim(i) = shape_buffer[i];
+ assert(shp_buf[i] != 0); // It shouldn't be 0.
+ new_shape.dim(i) = shp_buf[i];
}
return new_shape;
@@ -305,6 +329,9 @@ ir::Shape inferConcatShape(const Shapes &in_shapes, const ir::operation::Concat:
ir::Shape inferConv2DShape(const ir::Shape &in_shape, const ir::Shape &ker_shape,
const ir::operation::Conv2D::Param &param, ir::Layout layout)
{
+ if (param.stride.horizontal == 0 || param.stride.vertical == 0)
+ throw std::runtime_error{"Conv2D: stride values must be positive"};
+
auto ifm_shape = in_shape.asFeature(layout);
// Kernel format is [depth_out, kernel_height, kernel_width, depth_in]
@@ -321,6 +348,9 @@ ir::Shape inferDepthwiseConv2DShape(const ir::Shape &in_shape, const ir::Shape &
const ir::operation::DepthwiseConv2D::Param &param,
ir::Layout layout)
{
+ if (param.stride.horizontal == 0 || param.stride.vertical == 0)
+ throw std::runtime_error{"DepthwiseConv2D: stride values must be positive"};
+
assert(layout == ir::Layout::NHWC);
auto ifm_shape = in_shape.asFeature(layout);
@@ -330,7 +360,7 @@ ir::Shape inferDepthwiseConv2DShape(const ir::Shape &in_shape, const ir::Shape &
assert(kf_shape.N == 1);
const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, kf_shape.H, kf_shape.W,
- param.padding, param.stride);
+ param.padding, param.stride, param.dilation);
return ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, kf_shape.C};
}
@@ -354,18 +384,22 @@ ir::Shape inferExpandDimsShape(const ir::Shape &in_shape, int32_t axis)
return out_shape;
}
-ir::Shape inferFillShape(const ir::Shape &in_shape, const int32_t *buffer)
+template <typename T> ir::Shape inferFillShape(const ir::Shape &fill_shape, const T *shape_buf)
{
- ir::Shape out_shape(in_shape.dim(0));
+ ir::Shape out_shape(fill_shape.dim(0));
for (int out_x = 0; out_x < out_shape.rank(); ++out_x)
{
- out_shape.dim(out_x) = buffer[out_x];
+ out_shape.dim(out_x) = static_cast<int32_t>(shape_buf[out_x]);
}
return out_shape;
}
+// template instantiation
+template ir::Shape inferFillShape(const ir::Shape &fill_shape, const int32_t *shape_buf);
+template ir::Shape inferFillShape(const ir::Shape &fill_shape, const int64_t *shape_buf);
+
ir::Shape inferFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &ker_shape)
{
assert(in_shape.rank() >= 2);
@@ -380,11 +414,60 @@ ir::Shape inferFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &k
return {ir::Shape({static_cast<int32_t>(batch_size), num_units})};
}
+ir::Shape inferBCQFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &cluster_shape,
+ const int32_t *cluster_buf)
+{
+ assert(cluster_shape.rank() == 2);
+ assert(cluster_shape.dim(1) == 2);
+
+ const auto input_size = in_shape.dim(1);
+ const auto output_size = bcq::getOutputSize(cluster_shape, cluster_buf);
+
+ return {ir::Shape({output_size, input_size})};
+}
+
+ir::Shape inferBCQGatherShape(const ir::Shape &indices_shape, const ir::Shape &cluster_shape,
+ const int32_t *cluster_buf, int rank,
+ const ir::operation::BCQGather::Param &param)
+{
+ ir::Shape out_shape;
+ ir::Shape in_original_shape;
+
+ assert(cluster_shape.rank() == 2);
+ assert(cluster_shape.dim(1) == 2);
+
+ auto hidden_size = param.input_hidden_size;
+ auto axis = param.axis;
+
+ in_original_shape.append(bcq::getOutputSize(cluster_shape, cluster_buf));
+ in_original_shape.append(hidden_size);
+
+ const int indices_rank = indices_shape.rank();
+ for (int idx = 0; idx < rank; ++idx)
+ {
+ if (idx == (int)axis)
+ {
+ for (int indices_idx = 0; indices_idx < indices_rank; indices_idx++)
+ {
+ out_shape.append(indices_shape.dim(indices_idx));
+ }
+ }
+ else
+ {
+ out_shape.append(in_original_shape.dim(idx));
+ }
+ }
+
+ return out_shape;
+}
+
ir::Shape inferGatherShape(const ir::Shape &input_shape, const ir::Shape &indices_shape, int axis,
int rank)
{
ir::Shape out_shape;
+
const int indices_rank = indices_shape.rank();
+
for (int idx = 0; idx < rank; ++idx)
{
if (idx == axis)
@@ -470,6 +553,9 @@ ir::Shape inferPadShape(const ir::Shape &in_shape, const int32_t *pad_buf, const
ir::Shape inferPoolShape(const ir::Shape &in_shape, const ir::operation::Pool2D::Param &param,
const ir::Layout layout)
{
+ if (param.stride.horizontal == 0 || param.stride.vertical == 0)
+ throw std::runtime_error{"Pool2D: stride values must be positive"};
+
assert(layout == ir::Layout::NHWC);
auto ifm_shape = in_shape.asFeature(layout);
const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, param.kh, param.kw,
@@ -482,6 +568,17 @@ ir::Shape inferResizeBilinearShape(const ir::Shape &in_shape, const int32_t outp
const int32_t output_width)
{
assert(in_shape.rank() == 4);
+ if (output_height < 0)
+ {
+ throw std::runtime_error{"ResizeBilinear: size value must be positive value, output_height = " +
+ std::to_string(output_height)};
+ }
+ if (output_width < 0)
+ {
+ throw std::runtime_error{"ResizeBilinear: size value must be positive value, output_width = " +
+ std::to_string(output_width)};
+ }
+
ir::Shape ret(in_shape.rank());
ret.dim(0) = in_shape.dim(0);
@@ -497,9 +594,9 @@ template <typename T> ir::Shape inferRangeShape(T start_val, T limit_val, T delt
ir::Shape out_shape(static_cast<int>(1));
out_shape.dim(0) =
- (std::is_integral<T>::value
- ? ((std::abs(start_val - limit_val) + std::abs(delta_val) - 1) / std::abs(delta_val))
- : std::ceil(std::abs((start_val - limit_val) / delta_val)));
+ (std::is_integral<T>::value
+ ? ((std::abs(start_val - limit_val) + std::abs(delta_val) - 1) / std::abs(delta_val))
+ : std::ceil(std::abs((start_val - limit_val) / delta_val)));
return out_shape;
}
@@ -511,12 +608,12 @@ ir::Shape inferReshapeShape(const int32_t *shape_buf, const int32_t shape_num_el
const size_t total_num_elements)
{
ir::Shape ret(shape_num_elements);
- int32_t flatten_dim = ir::Shape::UNSPECIFIED_DIM;
+ int32_t flatten_dim = ir::Shape::kUnspecifiedDim;
for (int32_t i = 0; i < shape_num_elements; ++i)
{
if (shape_buf[i] < 0)
{
- if (flatten_dim != ir::Shape::UNSPECIFIED_DIM)
+ if (flatten_dim != ir::Shape::kUnspecifiedDim)
throw std::runtime_error("Reshape: 2nd param has special dim(for flatten) more than twice");
flatten_dim = i;
ret.dim(i) = 1;
@@ -526,7 +623,7 @@ ir::Shape inferReshapeShape(const int32_t *shape_buf, const int32_t shape_num_el
ret.dim(i) = shape_buf[i];
}
}
- if (flatten_dim != ir::Shape::UNSPECIFIED_DIM)
+ if (flatten_dim != ir::Shape::kUnspecifiedDim)
ret.dim(flatten_dim) = total_num_elements / ret.num_elements();
// Check reshapable
@@ -566,9 +663,9 @@ ir::Shape inferSelectShape(const ir::Shape &input_cond_shape, const ir::Shape &i
ir::Shape true_shape = input_true_shape;
ir::Shape false_shape = input_false_shape;
int most_rank =
- (cond_shape.rank() >= true_shape.rank()) && (cond_shape.rank() >= false_shape.rank())
- ? cond_shape.rank()
- : (false_shape.rank() >= true_shape.rank() ? false_shape.rank() : true_shape.rank());
+ (cond_shape.rank() >= true_shape.rank()) && (cond_shape.rank() >= false_shape.rank())
+ ? cond_shape.rank()
+ : (false_shape.rank() >= true_shape.rank() ? false_shape.rank() : true_shape.rank());
ir::Shape calculate_shape(most_rank);
@@ -579,9 +676,9 @@ ir::Shape inferSelectShape(const ir::Shape &input_cond_shape, const ir::Shape &i
for (int i = 0; i < most_rank; ++i)
{
calculate_shape.dim(i) =
- (cond_shape.dim(i) >= true_shape.dim(i)) && (cond_shape.dim(i) >= false_shape.dim(i))
- ? cond_shape.dim(i)
- : (false_shape.dim(i) >= true_shape.dim(i) ? false_shape.dim(i) : true_shape.dim(i));
+ (cond_shape.dim(i) >= true_shape.dim(i)) && (cond_shape.dim(i) >= false_shape.dim(i))
+ ? cond_shape.dim(i)
+ : (false_shape.dim(i) >= true_shape.dim(i) ? false_shape.dim(i) : true_shape.dim(i));
if ((cond_shape.dim(i) != calculate_shape.dim(i) && cond_shape.dim(i) != 1) ||
(true_shape.dim(i) != calculate_shape.dim(i) && true_shape.dim(i) != 1) ||
@@ -613,7 +710,8 @@ ir::Shape inferSelectShape(const ir::Shape &input_cond_shape, const ir::Shape &i
return new_shape;
}
-ir::Shape inferSliceShape(const ir::Shape &input_shape, const int32_t *begins, const int32_t *sizes)
+template <typename T>
+ir::Shape inferSliceShape(const ir::Shape &input_shape, const T *begins_buf, const T *sizes_buf)
{
const uint32_t rank = input_shape.rank();
ir::Shape out_shape(rank);
@@ -623,12 +721,12 @@ ir::Shape inferSliceShape(const ir::Shape &input_shape, const int32_t *begins, c
const auto input_dim = input_shape.dim(idx);
// begin is zero-based
- auto begin = begins[idx];
+ auto begin = begins_buf[idx];
if (begin < 0)
throw std::runtime_error("shape inference Slice: Invalid begin.");
// size is one-based
- auto size = sizes[idx];
+ auto size = sizes_buf[idx];
if (size < -1)
throw std::runtime_error("shape inference Slice: Invalid size.");
@@ -638,18 +736,23 @@ ir::Shape inferSliceShape(const ir::Shape &input_shape, const int32_t *begins, c
}
else
{
- if (input_dim < begin + size)
+ if (input_dim < static_cast<int32_t>(begin + size))
throw std::runtime_error("shape inference Slice: Invalid begin and size.");
}
- out_shape.dim(idx) = size;
+ out_shape.dim(idx) = static_cast<int32_t>(size);
}
return out_shape;
}
+// template instantiation
+template ir::Shape inferSliceShape(const ir::Shape &input_shape, const int32_t *begins_buf,
+ const int32_t *sizes_buf);
+template ir::Shape inferSliceShape(const ir::Shape &input_shape, const int64_t *begins_buf,
+ const int64_t *sizes_buf);
ir::Shape inferSpaceToBatchNDShape(const ir::Shape &input_shape, const ir::Shape &block_shape_shape,
- const ir::Shape &padding_shape, const int32_t *block_shape_data,
- const int32_t *padding_data)
+ const ir::Shape &padding_shape, const int32_t *block_shape_buf,
+ const int32_t *padding_buf)
{
const uint32_t rank = input_shape.rank();
ir::Shape out_shape(rank);
@@ -677,14 +780,14 @@ ir::Shape inferSpaceToBatchNDShape(const ir::Shape &input_shape, const ir::Shape
for (int dim = 0; dim < kSpatialDimensionNum; ++dim)
{
int final_dim_size =
- (input_shape.dim(dim + 1) + padding_data[dim * 2] + padding_data[dim * 2 + 1]);
+ (input_shape.dim(dim + 1) + padding_buf[dim * 2] + padding_buf[dim * 2 + 1]);
- assert(final_dim_size % block_shape_data[dim] == 0);
+ assert(final_dim_size % block_shape_buf[dim] == 0);
- out_shape.dim(dim + 1) = final_dim_size / block_shape_data[dim];
+ out_shape.dim(dim + 1) = final_dim_size / block_shape_buf[dim];
}
- const int output_batch_size = input_shape.dim(0) * block_shape_data[0] * block_shape_data[1];
+ const int output_batch_size = input_shape.dim(0) * block_shape_buf[0] * block_shape_buf[1];
const int output_channel_size = input_shape.dim(3);
out_shape.dim(0) = output_batch_size;
@@ -740,7 +843,7 @@ ir::Shape inferSqueezeShape(const ir::Shape &in_shape, const ir::operation::Sque
if (!(current >= 0 && current < shape_rank && in_shape.dim(current) == 1))
{
throw std::runtime_error(
- "The following conditions must be met: 0 <= dim < Shape rank, dim == 1");
+ "The following conditions must be met: 0 <= dim < Shape rank, dim == 1");
}
if (!should_squeeze[current])
@@ -948,35 +1051,71 @@ ir::Shape inferStridedSliceShape(const ir::Shape &input_shape, const StridedSlic
return out_shape;
}
-ir::Shape inferTileShape(const ir::Shape &in_shape, const int32_t *multiplier)
+ir::Shape inferTileShape(const ir::Shape &in_shape, const int32_t *multiplier_buf,
+ const int32_t multiplier_size)
{
- // assert(in_shape.rank() == multiplier.rank());
+ if (multiplier_size != in_shape.rank())
+ {
+ throw std::runtime_error(
+ "inferTileShape failed, input rank: " + std::to_string(in_shape.rank()) +
+ ", bad multipliers size: " + std::to_string(multiplier_size) + "");
+ }
ir::Shape new_Shape(in_shape.rank());
for (int i = 0; i < in_shape.rank(); ++i)
{
- assert(multiplier[i]); // multiplier[i] shuld not be 0.
- new_Shape.dim(i) = in_shape.dim(i) * multiplier[i];
+ assert(multiplier_buf[i]); // multiplier_buf[i] shuld not be 0.
+ new_Shape.dim(i) = in_shape.dim(i) * multiplier_buf[i];
}
return new_Shape;
}
-ir::Shape inferTransposeShape(const ir::Shape &in_shape, const std::vector<int> &perm)
+ir::Shape inferTransposeShape(const ir::Shape &in_shape, const int32_t *perm_buf,
+ const int32_t perm_size)
{
- if (static_cast<int>(perm.size()) > in_shape.rank())
+ const auto rank = in_shape.rank();
+ if (perm_size > rank)
{
- throw std::runtime_error("inferTransposeShape failed, bad rank size: " +
- std::to_string(static_cast<int>(perm.size())));
+ throw std::runtime_error("inferTransposeShape failed, bad permutation size: " +
+ std::to_string(perm_size));
}
- ir::Shape out_shape(static_cast<int>(perm.size()));
- for (int idx = 0; idx < static_cast<int>(perm.size()); idx++)
+
+ const int32_t *perm_data = perm_buf;
+ std::vector<int32_t> regular_perm_vec;
+ if (perm_size == 0)
+ {
+ // perm_data will be set to (n-1...0)
+ regular_perm_vec.resize(rank);
+ std::iota(regular_perm_vec.begin(), regular_perm_vec.end(), 0);
+ std::reverse(regular_perm_vec.begin(), regular_perm_vec.end());
+ perm_data = regular_perm_vec.data();
+ }
+ else
{
- if (perm[idx] < 0 || perm[idx] >= static_cast<int>(perm.size()))
+ assert(rank == perm_size);
+ }
+
+ ir::Shape out_shape(rank);
+ std::vector<bool> visit_perms(rank, false);
+ for (int idx = 0; idx < rank; idx++)
+ {
+ const auto perm_val = perm_data[idx];
+ // Check invalid permutation value
+ if (perm_val < 0 || perm_val >= rank)
{
- throw std::runtime_error("inferTransposeShape failed, bad perm value: " +
- std::to_string(perm[idx]));
+ throw std::runtime_error("inferTransposeShape failed, bad permutation value: " +
+ std::to_string(perm_val));
}
- out_shape.dim(idx) = in_shape.dim(perm[idx]);
+
+ // Check duplicated permutation value
+ if (visit_perms.at(perm_val))
+ {
+ throw std::runtime_error("inferTransposeShape failed, duplicated permutation value: " +
+ std::to_string(perm_val));
+ }
+ visit_perms.at(perm_val) = true;
+
+ out_shape.dim(idx) = in_shape.dim(perm_val);
}
return out_shape;
}
diff --git a/runtime/onert/core/src/util/ShapeInference.test.cc b/runtime/onert/core/src/util/ShapeInference.test.cc
new file mode 100644
index 000000000..96579bfa2
--- /dev/null
+++ b/runtime/onert/core/src/util/ShapeInference.test.cc
@@ -0,0 +1,544 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/ShapeInference.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert::ir;
+
+TEST(ShapeInference, Elementwise)
+{
+ Shape lhs_shape{1, 299, 299, 3};
+ Shape rhs_shape{3};
+ auto infered_out_shape = onert::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.dim(0), 1);
+ ASSERT_EQ(infered_out_shape.dim(1), 299);
+ ASSERT_EQ(infered_out_shape.dim(2), 299);
+ ASSERT_EQ(infered_out_shape.dim(3), 3);
+}
+
+TEST(ShapeInference, neg_Elementwise)
+{
+ Shape lhs_shape{1, 299, 299, 3};
+ Shape rhs_shape{5, 3};
+ ASSERT_THROW(onert::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape), std::runtime_error);
+}
+
+TEST(ShapeInference, Pool2DNodeSame)
+{
+ Shape in_shape{10, 6, 12, 20};
+ Stride stride{3, 7};
+ Padding padding{PaddingType::SAME};
+
+ operation::Pool2D::Param avg_pool_param{
+ operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+ auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+
+ operation::Pool2D::Param max_pool_param{
+ operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+ infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+}
+
+TEST(ShapeInference, Pool2DNodeValid)
+{
+ Shape in_shape{10, 6, 12, 20};
+ Stride stride{3, 7};
+ Padding padding{PaddingType::VALID};
+
+ operation::Pool2D::Param avg_pool_param{
+ operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+ auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+
+ operation::Pool2D::Param max_pool_param{
+ operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+ infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+}
+
+TEST(ShapeInference, Pool2DNodeExplicit)
+{
+ Shape in_shape{10, 3, 5, 20};
+
+ Stride stride{3, 7};
+ Padding padding{4, 3, 2, 1};
+
+ operation::Pool2D::Param avg_pool_param{
+ operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+ auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+
+ operation::Pool2D::Param max_pool_param{
+ operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+ infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+}
+
+TEST(ShapeInference, neg_Pool2DNode_InvalidStride)
+{
+ Shape in_shape{10, 6, 12, 20};
+ Stride stride{0, 7};
+ Padding padding{PaddingType::SAME};
+
+ operation::Pool2D::Param avg_pool_param{
+ operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+ ASSERT_THROW(onert::shape_inference::inferPoolShape(in_shape, avg_pool_param),
+ std::runtime_error);
+}
+
+TEST(ShapeInference, Conv2D)
+{
+ Shape in_shape{10, 6, 12, 20};
+ Shape ker_shape{30, 3, 6, 20};
+
+ operation::Conv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, Activation::NONE,
+ Dilation{1, 1}};
+ auto infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
+
+ param = operation::Conv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, Activation::NONE,
+ Dilation{1, 1}};
+ infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
+
+ param =
+ operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE, Dilation{1, 1}};
+ infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
+}
+
+TEST(ShapeInference, neg_Conv2D_InvalidStride)
+{
+ Shape in_shape{10, 6, 12, 20};
+ Shape ker_shape{30, 3, 6, 20};
+
+ operation::Conv2D::Param param{Stride{0, 0}, Padding{PaddingType::VALID}, Activation::NONE,
+ Dilation{1, 1}};
+ ASSERT_THROW(onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param),
+ std::runtime_error);
+}
+
+TEST(ShapeInference, DepthwiseConv2D)
+{
+ Shape in_shape{10, 6, 12, 20};
+ Shape ker_shape{1, 3, 6, 60};
+
+ operation::DepthwiseConv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, 3,
+ Activation::NONE, Dilation{1, 1}};
+ auto infered_out_shape =
+ onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
+
+ param = operation::DepthwiseConv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, 3,
+ Activation::NONE, Dilation{1, 1}};
+ infered_out_shape = onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
+
+ param = operation::DepthwiseConv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, 3, Activation::NONE,
+ Dilation{1, 1}};
+ infered_out_shape = onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
+}
+
+TEST(ShapeInference, neg_DepthwiseConv2D_InvalidSride)
+{
+ Shape in_shape{10, 6, 12, 20};
+ Shape ker_shape{1, 3, 6, 60};
+
+ operation::DepthwiseConv2D::Param param{Stride{3, 0}, Padding{PaddingType::VALID}, 3,
+ Activation::NONE, Dilation{1, 1}};
+ ASSERT_THROW(onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param),
+ std::runtime_error);
+}
+
+TEST(ShapeInference, Concat)
+{
+ {
+ Shape in1{10, 20, 30, 3, 50};
+ Shape in2{10, 20, 30, 2, 50};
+ Shape in3{10, 20, 30, 2, 50};
+
+ operation::Concat::Param param{3};
+ auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2, in3}, param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 5);
+ ASSERT_EQ(infered_out_shape.dim(0), 10);
+ ASSERT_EQ(infered_out_shape.dim(1), 20);
+ ASSERT_EQ(infered_out_shape.dim(2), 30);
+ ASSERT_EQ(infered_out_shape.dim(3), 7);
+ ASSERT_EQ(infered_out_shape.dim(4), 50);
+ }
+ {
+ // case 1. when axis < 0
+ Shape in1{10, 20, 2};
+ Shape in2{10, 20, 3};
+
+ operation::Concat::Param param{-1};
+ auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2}, param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 3);
+ ASSERT_EQ(infered_out_shape.dim(0), 10);
+ ASSERT_EQ(infered_out_shape.dim(1), 20);
+ ASSERT_EQ(infered_out_shape.dim(2), 5);
+ }
+ {
+ // case 2. when axis < 0
+ Shape in1{2, 20, 2};
+ Shape in2{3, 20, 2};
+
+ operation::Concat::Param param{-3};
+ auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2}, param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 3);
+ ASSERT_EQ(infered_out_shape.dim(0), 5);
+ ASSERT_EQ(infered_out_shape.dim(1), 20);
+ ASSERT_EQ(infered_out_shape.dim(2), 2);
+ }
+}
+
+TEST(ShapeInference, neg_Concat)
+{
+ {
+ operation::Concat::Param param{2};
+ Shape in1{10, 1, 3};
+ Shape in2{10, 2, 4}; // dim[1] should be 1 but 2
+
+ EXPECT_ANY_THROW(onert::shape_inference::inferConcatShape({in1, in2}, param));
+ }
+ { // wrong rank
+ operation::Concat::Param param{2};
+ Shape in1{10, 2, 3, 4};
+ Shape in2{10, 2, 4}; // rank should be 4
+
+ EXPECT_ANY_THROW(onert::shape_inference::inferConcatShape({in1, in2}, param));
+ }
+}
+
+TEST(ShapeInference, ExpandDims)
+{
+ Shape in_shape{30, 40};
+
+ auto check = [&](int32_t axis, Shape &expected) {
+ auto actual = onert::shape_inference::inferExpandDimsShape(in_shape, axis);
+
+ ASSERT_EQ(actual.rank(), 3);
+ for (int32_t dim = 0; dim < expected.rank(); dim++)
+ ASSERT_EQ(actual.dim(dim), expected.dim(dim));
+ };
+
+ { // boundary
+ int32_t axis = 0;
+ Shape expected{1, 30, 40};
+ check(axis, expected);
+ }
+ { // boundary
+ int32_t axis = 2;
+ Shape expected{30, 40, 1};
+ check(axis, expected);
+ }
+ { // inside
+ int32_t axis = 1;
+ Shape expected{30, 1, 40};
+ check(axis, expected);
+ }
+ { // negative boundary
+ int32_t axis = -1;
+ Shape expected{30, 40, 1};
+ check(axis, expected);
+ }
+ { // negative boundary
+ int32_t axis = -3;
+ Shape expected{1, 30, 40};
+ check(axis, expected);
+ }
+}
+
+TEST(ShapeInference, neg_ExpandDims)
+{
+ Shape in_shape{30, 40};
+
+ { // over boundary
+ int32_t axis = 3;
+ ASSERT_THROW(onert::shape_inference::inferExpandDimsShape(in_shape, axis), std::runtime_error);
+ }
+ { // over boundary
+ int32_t axis = -4;
+ ASSERT_THROW(onert::shape_inference::inferExpandDimsShape(in_shape, axis), std::runtime_error);
+ }
+}
+
+TEST(ShapeInference, FullyConnected)
+{
+ Shape in_shape{3, 4, 5, 6};
+ Shape ker_shape{3, 10};
+ auto infered_out_shape = onert::shape_inference::inferFullyConnectedShape(in_shape, ker_shape);
+
+ ASSERT_EQ(infered_out_shape.rank(), 2);
+ ASSERT_EQ(infered_out_shape.dim(0), 36);
+ ASSERT_EQ(infered_out_shape.dim(1), 3);
+}
+
+TEST(ShapeInference, Transpose)
+{
+ auto check = [&](Shape &in_shape, std::vector<int> perm, Shape &expected) {
+ // pre-conditions
+ ASSERT_EQ(in_shape.rank(), perm.size());
+ ASSERT_EQ(expected.rank(), perm.size());
+ auto inferred_out_shape =
+ onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size());
+ // post-conditions
+ ASSERT_EQ(inferred_out_shape.rank(), perm.size());
+ for (int32_t dim = 0; dim < expected.rank(); dim++)
+ {
+ ASSERT_EQ(inferred_out_shape.dim(dim), expected.dim(dim));
+ }
+ };
+ // check for 2-D
+ {
+ Shape in_shape{2, 3};
+ std::vector<int> perm = {1, 0};
+ Shape expected{3, 2};
+ // int32_t rank = 2;
+ check(in_shape, perm, expected);
+ }
+ // check for 3-D
+ {
+ Shape in_shape{1, 2, 3};
+ std::vector<int> perm = {2, 0, 1};
+ Shape expected{3, 1, 2};
+ // int32_t rank = 3;
+ check(in_shape, perm, expected);
+ }
+ // check for 4-D
+ {
+ Shape in_shape{1, 2, 3, 4};
+ std::vector<int> perm = {1, 3, 0, 2};
+ Shape expected{2, 4, 1, 3};
+ // int32_t rank = 4;
+ check(in_shape, perm, expected);
+ }
+}
+
+TEST(ShapeInference, neg_Transpose)
+{
+ Shape in_shape{1, 2, 3};
+ // Invalid parameter size
+ {
+ std::vector<int> perm = {2, 0, 1, 0};
+ // int32_t rank = 3;
+ ASSERT_THROW(onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()),
+ std::runtime_error);
+ }
+ // Invalid parameter value
+ {
+ std::vector<int> perm = {2, 0, 3};
+ // int32_t rank = 3;
+ ASSERT_THROW(onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()),
+ std::runtime_error);
+ }
+}
+
+TEST(ShapeInference, Gather)
+{
+ auto check = [&](Shape &input, Shape &indices, Shape &expected, int32_t axis) {
+ int rank = input.rank();
+ auto actual = onert::shape_inference::inferGatherShape(input, indices, axis, rank);
+
+ ASSERT_EQ(actual.rank(), expected.rank());
+
+ for (int32_t dim = 0; dim < expected.rank(); dim++)
+ ASSERT_EQ(actual.dim(dim), expected.dim(dim));
+ };
+
+ // check for 2-D, 3-D, axis 0
+ {
+ Shape input{3, 4};
+ Shape indices{1, 1, 2};
+ int32_t axis = 0;
+ Shape expected{1, 1, 2, 4};
+ check(input, indices, expected, axis);
+ }
+
+ // check for 2-D, 3-D, axis 1
+ {
+ Shape input{3, 4};
+ Shape indices{1, 2, 1};
+ int32_t axis = 1;
+ Shape expected{3, 1, 2, 1};
+ check(input, indices, expected, axis);
+ }
+
+ // check for 3-D, 2-D, axis 0
+ {
+ Shape input{2, 3, 4};
+ Shape indices{1, 2};
+ int32_t axis = 0;
+ Shape expected{1, 2, 3, 4};
+ check(input, indices, expected, axis);
+ }
+
+ // check for 3-D, 2-D, axis 2
+ {
+ Shape input{2, 3, 4};
+ Shape indices{2, 1};
+ int32_t axis = 2;
+ Shape expected{2, 3, 2, 1};
+ check(input, indices, expected, axis);
+ }
+
+ // check for 4D, axis 0
+ {
+ Shape input{1, 2, 3, 4};
+ Shape indices{2};
+ int32_t axis = 0;
+ Shape expected{2, 2, 3, 4};
+ check(input, indices, expected, axis);
+ }
+}
+
+TEST(ShapeInference, BCQFullyConnected)
+{
+ auto check = [&](Shape &in_shape, Shape &cluster_shape, std::vector<int> cluster,
+ Shape &expected) {
+ auto actual =
+ onert::shape_inference::inferBCQFullyConnectedShape(in_shape, cluster_shape, cluster.data());
+ ASSERT_EQ(actual.rank(), expected.rank());
+
+ for (int32_t dim = 0; dim < expected.rank(); dim++)
+ ASSERT_EQ(actual.dim(dim), expected.dim(dim));
+ };
+
+ {
+ Shape in_shape{10, 1};
+ Shape cluster_shape{3, 2};
+ std::vector<int> cluster = {1, 10, 2, 10, 3, 10};
+
+ Shape expected{30, 1};
+ check(in_shape, cluster_shape, cluster, expected);
+ }
+
+ {
+ Shape in_shape{1, 1};
+ Shape cluster_shape{1, 2};
+ std::vector<int> cluster = {3, 50};
+
+ Shape expected{50, 1};
+ check(in_shape, cluster_shape, cluster, expected);
+ }
+}
+
+TEST(ShapeInference, BCQGather)
+{
+ auto check = [&](Shape &indices_shape, Shape &cluster_shape, std::vector<int> cluster,
+ uint32_t hidden_size, uint32_t axis, int rank, Shape &expected) {
+ operation::BCQGather::Param param{hidden_size, axis};
+ auto actual = onert::shape_inference::inferBCQGatherShape(indices_shape, cluster_shape,
+ cluster.data(), rank, param);
+ ASSERT_EQ(actual.rank(), expected.rank());
+
+ for (int32_t dim = 0; dim < expected.rank(); dim++)
+ ASSERT_EQ(actual.dim(dim), expected.dim(dim));
+ };
+
+ {
+ Shape indices_shape{5, 1};
+ Shape cluster_shape{3, 2};
+ std::vector<int> cluster = {1, 10, 2, 10, 3, 10};
+ uint32_t hidden_size = 10;
+ uint32_t axis = 0;
+ int rank = 2;
+
+ Shape expected{5, 1, 10};
+ check(indices_shape, cluster_shape, cluster, hidden_size, axis, rank, expected);
+ }
+
+ {
+ Shape indices_shape{5, 1};
+ Shape cluster_shape{3, 2};
+ std::vector<int> cluster = {1, 10, 2, 10, 3, 10};
+ uint32_t hidden_size = 10;
+ uint32_t axis = 1;
+ int rank = 2;
+
+ Shape expected{30, 5, 1};
+ check(indices_shape, cluster_shape, cluster, hidden_size, axis, rank, expected);
+ }
+}
diff --git a/runtime/onert/core/src/util/TracingCtx.cc b/runtime/onert/core/src/util/TracingCtx.cc
new file mode 100644
index 000000000..c05baee60
--- /dev/null
+++ b/runtime/onert/core/src/util/TracingCtx.cc
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/TracingCtx.h"
+
+namespace onert
+{
+namespace util
+{
+
+// initializing static member var
+std::mutex TracingCtx::_session_id_mutex;
+uint32_t TracingCtx::_next_session_id = 0;
+
+} // namespace util
+} // namespace onert